From 1e301228600178ed0f597169cf039c96e51d5102 Mon Sep 17 00:00:00 2001 From: roos Date: Tue, 24 Sep 2019 14:48:53 +0200 Subject: [PATCH 1/3] discovery: Add metrics for itopo and idiscovery Metrics added for idiscovery: # HELP idiscovery_file_writes_total The total number of file writes on updated topology # TYPE idiscovery_file_writes_total counter idiscovery_file_writes_total{result="ok_success",type="static"} 1 # HELP idiscovery_sent_requests_total The total number of requests sent to the discovey service # TYPE idiscovery_sent_requests_total counter idiscovery_sent_requests_total{result="err_request",type="dynamic"} 1 idiscovery_sent_requests_total{result="ok_success",type="dynamic"} 8 idiscovery_sent_requests_total{result="ok_success",type="static"} 1 Metrics added for itopo: # HELP itopo_current_active Indicate whether the current topology is active. 0=inactive, 1=active. # TYPE itopo_current_active gauge itopo_current_active{type="dynamic"} 1 itopo_current_active{type="static"} 1 # HELP itopo_current_timestamp The timestamp of the current topology. Remains set, even when inactive. # TYPE itopo_current_timestamp gauge itopo_current_timestamp{type="dynamic"} 1.5693351735612462e+09 itopo_current_timestamp{type="static"} 1.5693351375621471e+09 # HELP itopo_current_ttl_seconds The TTL of the current topology. 0 indicates no TTL. Remains set, even when inactive. # TYPE itopo_current_ttl_seconds gauge itopo_current_ttl_seconds{type="dynamic"} 6e+10 itopo_current_ttl_seconds{type="static"} 0 # HELP itopo_last_updates Timestamp of the last update attempts. # TYPE itopo_last_updates gauge itopo_last_updates{result="ok_success",type="dynamic"} 1.56933517356125e+09 itopo_last_updates{result="ok_success",type="static"} 1.5693351375621483e+09 # HELP itopo_updates_total The total number of updates. # TYPE itopo_updates_total counter itopo_updates_total{result="ok_success",type="dynamic"} 8 itopo_updates_total{result="ok_success",type="static"} 2 fixes #3097 --- go/lib/infra/modules/idiscovery/BUILD.bazel | 1 + go/lib/infra/modules/idiscovery/idiscovery.go | 27 +++- .../idiscovery/internal/metrics/BUILD.bazel | 12 ++ .../idiscovery/internal/metrics/metrics.go | 82 +++++++++++ go/lib/infra/modules/itopo/BUILD.bazel | 1 + go/lib/infra/modules/itopo/cleaner.go | 3 +- .../itopo/internal/metrics/BUILD.bazel | 12 ++ .../modules/itopo/internal/metrics/metrics.go | 135 ++++++++++++++++++ go/lib/infra/modules/itopo/itopo.go | 79 ++++++++-- 9 files changed, 338 insertions(+), 14 deletions(-) create mode 100644 go/lib/infra/modules/idiscovery/internal/metrics/BUILD.bazel create mode 100644 go/lib/infra/modules/idiscovery/internal/metrics/metrics.go create mode 100644 go/lib/infra/modules/itopo/internal/metrics/BUILD.bazel create mode 100644 go/lib/infra/modules/itopo/internal/metrics/metrics.go diff --git a/go/lib/infra/modules/idiscovery/BUILD.bazel b/go/lib/infra/modules/idiscovery/BUILD.bazel index da4283cd16..aa87cca07f 100644 --- a/go/lib/infra/modules/idiscovery/BUILD.bazel +++ b/go/lib/infra/modules/idiscovery/BUILD.bazel @@ -15,6 +15,7 @@ go_library( "//go/lib/discovery:go_default_library", "//go/lib/discovery/topofetcher:go_default_library", "//go/lib/fatal:go_default_library", + "//go/lib/infra/modules/idiscovery/internal/metrics:go_default_library", "//go/lib/infra/modules/itopo:go_default_library", "//go/lib/log:go_default_library", "//go/lib/periodic:go_default_library", diff --git a/go/lib/infra/modules/idiscovery/idiscovery.go b/go/lib/infra/modules/idiscovery/idiscovery.go index fc22158cca..08edf97c7e 100644 --- a/go/lib/infra/modules/idiscovery/idiscovery.go +++ b/go/lib/infra/modules/idiscovery/idiscovery.go @@ -44,6 +44,7 @@ import ( "github.com/scionproto/scion/go/lib/discovery" "github.com/scionproto/scion/go/lib/discovery/topofetcher" "github.com/scionproto/scion/go/lib/fatal" + "github.com/scionproto/scion/go/lib/infra/modules/idiscovery/internal/metrics" "github.com/scionproto/scion/go/lib/infra/modules/itopo" "github.com/scionproto/scion/go/lib/log" "github.com/scionproto/scion/go/lib/periodic" @@ -329,19 +330,32 @@ func (t *task) Run(ctx context.Context) { func (t *task) handleErr(ctx context.Context, err error) { t.logger(ctx).Error("[discovery] Unable to fetch topology", "err", err) + l := metrics.FetcherLabels{Type: t.metricType(), Result: metrics.ErrRequest} + metrics.Fetcher.Sent(l).Inc() } func (t *task) handleRaw(ctx context.Context, raw common.RawBytes, topo *topology.Topo) { + l := metrics.FetcherLabels{Type: t.metricType(), Result: metrics.Success} updated, err := t.callHandler(ctx, topo) + switch { + case err != nil: + l.Result = metrics.ErrUpdate + case !updated: + l.Result = metrics.OkIgnored + } if err != nil || t.filename == "" || !updated { + metrics.Fetcher.Sent(l).Inc() return } if err := util.WriteFile(t.filename, raw, 0644); err != nil { t.logger(ctx).Error("[discovery] Unable to write new topology to filesystem", "err", err) - return + l.Result = metrics.ErrWriteFile + } else { + t.logger(ctx).Trace("[discovery] Topology written to filesystem", + "file", t.filename, "params", t.fetcher.Params) } - t.logger(ctx).Trace("[discovery] Topology written to filesystem", - "file", t.filename, "params", t.fetcher.Params) + metrics.Fetcher.Sent(l).Inc() + metrics.Fetcher.File(l).Inc() } func (t *task) callHandler(ctx context.Context, topo *topology.Topo) (bool, error) { @@ -358,6 +372,13 @@ func (t *task) logger(ctx context.Context) log.Logger { return log.FromCtx(ctx).New("mode", t.mode) } +func (t *task) metricType() string { + if t.mode == discovery.Static { + return metrics.Static + } + return metrics.Dynamic +} + type flag struct { sync.Mutex set bool diff --git a/go/lib/infra/modules/idiscovery/internal/metrics/BUILD.bazel b/go/lib/infra/modules/idiscovery/internal/metrics/BUILD.bazel new file mode 100644 index 0000000000..1508bd108b --- /dev/null +++ b/go/lib/infra/modules/idiscovery/internal/metrics/BUILD.bazel @@ -0,0 +1,12 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["metrics.go"], + importpath = "github.com/scionproto/scion/go/lib/infra/modules/idiscovery/internal/metrics", + visibility = ["//go/lib/infra/modules/idiscovery:__subpackages__"], + deps = [ + "//go/lib/prom:go_default_library", + "@com_github_prometheus_client_golang//prometheus:go_default_library", + ], +) diff --git a/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go b/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go new file mode 100644 index 0000000000..7b1a0813b7 --- /dev/null +++ b/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go @@ -0,0 +1,82 @@ +// Copyright 2019 Anapaya Systems +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/scionproto/scion/go/lib/prom" +) + +// Namespace is the metrics namespace for the infra discovery module. +const Namespace = "idiscovery" + +// Topology types. +const ( + Static = "static" + Dynamic = "dynamic" +) + +// Result labels. +const ( + Success = prom.Success + OkIgnored = "ok_ignored" + ErrRequest = "err_request" + ErrUpdate = "err_update" + ErrWriteFile = "err_write_file" +) + +// Metrics initialization. +var ( + Fetcher = newFetcher() +) + +// FetcherLabels defines the requests label set. +type FetcherLabels struct { + Type string + Result string +} + +// Labels returns the name of the labels in correct order. +func (l FetcherLabels) Labels() []string { + return []string{"type", "result"} +} + +// Values returns the values of the label in correct order. +func (l FetcherLabels) Values() []string { + return []string{l.Type, l.Result} +} + +type fetcher struct { + sent *prometheus.CounterVec + file *prometheus.CounterVec +} + +func newFetcher() fetcher { + return fetcher{ + sent: prom.NewCounterVec(Namespace, "", "sent_requests_total", + "The total number of requests sent to the discovey service", FetcherLabels{}.Labels()), + file: prom.NewCounterVec(Namespace, "", "file_writes_total", + "The total number of file writes on updated topology", FetcherLabels{}.Labels()), + } +} + +func (r fetcher) Sent(l FetcherLabels) prometheus.Counter { + return r.sent.WithLabelValues(l.Values()...) +} + +func (r fetcher) File(l FetcherLabels) prometheus.Counter { + return r.file.WithLabelValues(l.Values()...) +} diff --git a/go/lib/infra/modules/itopo/BUILD.bazel b/go/lib/infra/modules/itopo/BUILD.bazel index 8c6f0b1da9..4fb350a3e2 100644 --- a/go/lib/infra/modules/itopo/BUILD.bazel +++ b/go/lib/infra/modules/itopo/BUILD.bazel @@ -12,6 +12,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//go/lib/common:go_default_library", + "//go/lib/infra/modules/itopo/internal/metrics:go_default_library", "//go/lib/log:go_default_library", "//go/lib/periodic:go_default_library", "//go/lib/topology:go_default_library", diff --git a/go/lib/infra/modules/itopo/cleaner.go b/go/lib/infra/modules/itopo/cleaner.go index 9c281ea371..44fb99fe14 100644 --- a/go/lib/infra/modules/itopo/cleaner.go +++ b/go/lib/infra/modules/itopo/cleaner.go @@ -44,7 +44,6 @@ func (c cleaner) Run(ctx context.Context) { log.FromCtx(ctx).Info("[itopo.cleaner] Dropping expired dynamic topology", "ts", st.topo.dynamic.Timestamp, "ttl", st.topo.dynamic.TTL, "expired", st.topo.dynamic.Expiry()) - st.topo.dynamic = nil - call(st.clbks.CleanDynamic) + st.dropDynamic() } } diff --git a/go/lib/infra/modules/itopo/internal/metrics/BUILD.bazel b/go/lib/infra/modules/itopo/internal/metrics/BUILD.bazel new file mode 100644 index 0000000000..9512b9724e --- /dev/null +++ b/go/lib/infra/modules/itopo/internal/metrics/BUILD.bazel @@ -0,0 +1,12 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["metrics.go"], + importpath = "github.com/scionproto/scion/go/lib/infra/modules/itopo/internal/metrics", + visibility = ["//go/lib/infra/modules/itopo:__subpackages__"], + deps = [ + "//go/lib/prom:go_default_library", + "@com_github_prometheus_client_golang//prometheus:go_default_library", + ], +) diff --git a/go/lib/infra/modules/itopo/internal/metrics/metrics.go b/go/lib/infra/modules/itopo/internal/metrics/metrics.go new file mode 100644 index 0000000000..3a9de4924e --- /dev/null +++ b/go/lib/infra/modules/itopo/internal/metrics/metrics.go @@ -0,0 +1,135 @@ +// Copyright 2019 Anapaya Systems +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/scionproto/scion/go/lib/prom" +) + +// Namespace is the metrics namespace for the infra topology module. +const Namespace = "itopo" + +// Topology types. +const ( + Static = "static" + Dynamic = "dynamic" +) + +// Result labels. +const ( + Success = prom.Success + OkIgnored = "ok_ignored" + ErrValidate = "err_validate" + ErrCommit = "err_commit" +) + +// Metrics initialization. +var ( + Current = newCurrent() + Updates = newUpdates() +) + +// CurrentLabels defines the current topology label set. +type CurrentLabels struct { + Type string +} + +// Labels returns the name of the labels in correct order. +func (l CurrentLabels) Labels() []string { + return []string{"type"} +} + +// Values returns the values of the label in correct order. +func (l CurrentLabels) Values() []string { + return []string{l.Type} +} + +type current struct { + timestamp *prometheus.GaugeVec + ttl *prometheus.GaugeVec + active *prometheus.GaugeVec +} + +func newCurrent() current { + return current{ + timestamp: prom.NewGaugeVec(Namespace, "", "current_timestamp", + "The timestamp of the current topology. Remains set, even when inactive.", + CurrentLabels{}.Labels()), + ttl: prom.NewGaugeVec(Namespace, "", "current_ttl_seconds", + "The TTL of the current topology. 0 indicates no TTL. Remains set, even when inactive.", + CurrentLabels{}.Labels()), + active: prom.NewGaugeVec(Namespace, "", "current_active", + "Indicate whether the current topology is active. 0=inactive, 1=active.", + CurrentLabels{}.Labels()), + } +} + +func (c current) Timestamp(l CurrentLabels) prometheus.Gauge { + return c.timestamp.WithLabelValues(l.Values()...) +} + +func (c current) TTL(l CurrentLabels) prometheus.Gauge { + return c.ttl.WithLabelValues(l.Values()...) +} + +func (c current) Active(l CurrentLabels) prometheus.Gauge { + return c.active.WithLabelValues(l.Values()...) +} + +// UpdateLabels defines the update label set. +type UpdateLabels struct { + Type string + Result string +} + +// Labels returns the name of the labels in correct order. +func (l UpdateLabels) Labels() []string { + return []string{"type", prom.LabelResult} +} + +// Values returns the values of the label in correct order. +func (l UpdateLabels) Values() []string { + return []string{l.Type, l.Result} +} + +// WithResult returns the label set with the modified result. +func (l UpdateLabels) WithResult(result string) UpdateLabels { + l.Result = result + return l +} + +type updates struct { + last *prometheus.GaugeVec + total *prometheus.CounterVec +} + +func newUpdates() updates { + return updates{ + last: prom.NewGaugeVec(Namespace, "", "last_updates", + "Timestamp of the last update attempts.", UpdateLabels{}.Labels()), + total: prom.NewCounterVec(Namespace, "", "updates_total", + "The total number of updates.", UpdateLabels{}.Labels()), + } +} + +func (u updates) Last(l UpdateLabels) prometheus.Gauge { + return u.last.WithLabelValues(l.Values()...) +} + +func (u updates) Total(l UpdateLabels) prometheus.Counter { + return u.total.WithLabelValues(l.Values()...) +} diff --git a/go/lib/infra/modules/itopo/itopo.go b/go/lib/infra/modules/itopo/itopo.go index e4f71b3d1b..9923c42f1e 100644 --- a/go/lib/infra/modules/itopo/itopo.go +++ b/go/lib/infra/modules/itopo/itopo.go @@ -23,6 +23,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/scionproto/scion/go/lib/common" + "github.com/scionproto/scion/go/lib/infra/modules/itopo/internal/metrics" "github.com/scionproto/scion/go/lib/log" "github.com/scionproto/scion/go/lib/topology" "github.com/scionproto/scion/go/proto" @@ -77,14 +78,29 @@ func Get() *topology.Topo { // to the currently active topology at the end of the function call. It might differ from // the input topology. The second return value indicates whether the in-memory // copy of the dynamic topology has been updated. -func SetDynamic(static *topology.Topo) (*topology.Topo, bool, error) { - return st.setDynamic(static) +func SetDynamic(dynamic *topology.Topo) (*topology.Topo, bool, error) { + l := metrics.UpdateLabels{Type: metrics.Dynamic} + topo, updated, err := st.setDynamic(dynamic) + switch { + case err != nil: + l.Result = metrics.ErrValidate + case updated: + l.Result = metrics.Success + default: + l.Result = metrics.OkIgnored + } + incUpdateMetric(l) + return topo, updated, err } // BeginSetDynamic checks whether setting the dynamic topology is permissible. The returned // transaction provides a view on which topology would be active, if committed. func BeginSetDynamic(dynamic *topology.Topo) (Transaction, error) { - return st.beginSetDynamic(dynamic) + tx, err := st.beginSetDynamic(dynamic) + if err != nil { + incUpdateMetric(metrics.UpdateLabels{Type: metrics.Dynamic, Result: metrics.ErrValidate}) + } + return tx, err } // SetStatic atomically sets the static topology. Whether semi-mutable fields are @@ -94,13 +110,28 @@ func BeginSetDynamic(dynamic *topology.Topo) (Transaction, error) { // or dynamic set and still valid). The second return value indicates whether the in-memory // copy of the static topology has been updated. func SetStatic(static *topology.Topo, semiMutAllowed bool) (*topology.Topo, bool, error) { - return st.setStatic(static, semiMutAllowed) + l := metrics.UpdateLabels{Type: metrics.Static} + topo, updated, err := st.setStatic(static, semiMutAllowed) + switch { + case err != nil: + l.Result = metrics.ErrValidate + case updated: + l.Result = metrics.Success + default: + l.Result = metrics.OkIgnored + } + incUpdateMetric(l) + return topo, updated, err } // BeginSetStatic checks whether setting the static topology is permissible. The returned // transaction provides a view on which topology would be active, if committed. func BeginSetStatic(static *topology.Topo, semiMutAllowed bool) (Transaction, error) { - return st.beginSetStatic(static, semiMutAllowed) + tx, err := st.beginSetStatic(static, semiMutAllowed) + if err != nil { + incUpdateMetric(metrics.UpdateLabels{Type: metrics.Static, Result: metrics.ErrValidate}) + } + return tx, err } // Transaction allows to get a view on which topology will be active without committing @@ -122,19 +153,27 @@ type Transaction struct { func (tx *Transaction) Commit() error { st.Lock() defer st.Unlock() + l := metrics.UpdateLabels{Type: metrics.Dynamic} + if tx.inputStatic != nil { + l.Type = metrics.Static + } if tx.staticAtTxStart != st.topo.static { + incUpdateMetric(l.WithResult(metrics.ErrCommit)) return common.NewBasicError("Static topology changed in the meantime", nil) } if !tx.IsUpdate() { + incUpdateMetric(l.WithResult(metrics.OkIgnored)) return nil } // Do transaction for static topology updated. if tx.inputStatic != nil { st.updateStatic(tx.inputStatic) + incUpdateMetric(l.WithResult(metrics.Success)) return nil } // Do transaction from dynamic topology update. - st.topo.dynamic = tx.inputDynamic + st.updateDynamic(tx.inputDynamic) + incUpdateMetric(l.WithResult(metrics.Success)) return nil } @@ -195,10 +234,24 @@ func (s *state) setDynamic(dynamic *topology.Topo) (*topology.Topo, bool, error) if keepOld(dynamic, s.topo.dynamic) { return s.topo.Get(), false, nil } - s.topo.dynamic = dynamic + s.updateDynamic(dynamic) return s.topo.Get(), true, nil } +func (s *state) updateDynamic(dynamic *topology.Topo) { + s.topo.dynamic = dynamic + cl := metrics.CurrentLabels{Type: metrics.Dynamic} + metrics.Current.Active(cl).Set(1) + metrics.Current.Timestamp(cl).SetToCurrentTime() + metrics.Current.TTL(cl).Set(float64(dynamic.TTL)) +} + +func (s *state) dropDynamic() { + s.topo.dynamic = nil + call(s.clbks.DropDynamic) + metrics.Current.Active(metrics.CurrentLabels{Type: metrics.Dynamic}).Set(0) +} + func (s *state) beginSetDynamic(dynamic *topology.Topo) (Transaction, error) { s.Lock() defer s.Unlock() @@ -276,11 +329,14 @@ func (s *state) beginSetStatic(static *topology.Topo, allowed bool) (Transaction func (s *state) updateStatic(static *topology.Topo) { // Drop dynamic topology if necessary. if s.validator.MustDropDynamic(static, s.topo.static) && s.topo.dynamic != nil { - s.topo.dynamic = nil - call(s.clbks.DropDynamic) + s.dropDynamic() } s.topo.static = static call(s.clbks.UpdateStatic) + cl := metrics.CurrentLabels{Type: metrics.Static} + metrics.Current.Active(cl).Set(1) + metrics.Current.Timestamp(cl).SetToCurrentTime() + metrics.Current.TTL(cl).Set(float64(static.TTL)) } func keepOld(newTopo, oldTopo *topology.Topo) bool { @@ -309,3 +365,8 @@ func call(clbk func()) { }() } } + +func incUpdateMetric(l metrics.UpdateLabels) { + metrics.Updates.Last(l).SetToCurrentTime() + metrics.Updates.Total(l).Inc() +} From b9a37647cd2e24b658e21b219ee860a6a660867e Mon Sep 17 00:00:00 2001 From: roos Date: Wed, 25 Sep 2019 09:46:22 +0200 Subject: [PATCH 2/3] feedback --- go/lib/infra/modules/idiscovery/idiscovery.go | 33 +++++++++++-------- .../idiscovery/internal/metrics/metrics.go | 24 +++++++++----- .../modules/itopo/internal/metrics/metrics.go | 11 +++++-- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/go/lib/infra/modules/idiscovery/idiscovery.go b/go/lib/infra/modules/idiscovery/idiscovery.go index 08edf97c7e..f9be2f5be6 100644 --- a/go/lib/infra/modules/idiscovery/idiscovery.go +++ b/go/lib/infra/modules/idiscovery/idiscovery.go @@ -330,12 +330,12 @@ func (t *task) Run(ctx context.Context) { func (t *task) handleErr(ctx context.Context, err error) { t.logger(ctx).Error("[discovery] Unable to fetch topology", "err", err) - l := metrics.FetcherLabels{Type: t.metricType(), Result: metrics.ErrRequest} + l := metrics.FetcherLabels{Static: t.static(), Result: metrics.ErrRequest} metrics.Fetcher.Sent(l).Inc() } func (t *task) handleRaw(ctx context.Context, raw common.RawBytes, topo *topology.Topo) { - l := metrics.FetcherLabels{Type: t.metricType(), Result: metrics.Success} + l := metrics.FetcherLabels{Static: t.static(), Result: metrics.Success} updated, err := t.callHandler(ctx, topo) switch { case err != nil: @@ -347,15 +347,23 @@ func (t *task) handleRaw(ctx context.Context, raw common.RawBytes, topo *topolog metrics.Fetcher.Sent(l).Inc() return } - if err := util.WriteFile(t.filename, raw, 0644); err != nil { - t.logger(ctx).Error("[discovery] Unable to write new topology to filesystem", "err", err) + if err := t.writeFile(ctx, raw); err != nil { l.Result = metrics.ErrWriteFile - } else { - t.logger(ctx).Trace("[discovery] Topology written to filesystem", - "file", t.filename, "params", t.fetcher.Params) } metrics.Fetcher.Sent(l).Inc() +} + +func (t *task) writeFile(ctx context.Context, raw common.RawBytes) error { + l := metrics.FetcherLabels{Static: t.static(), Result: metrics.Success} + if err := util.WriteFile(t.filename, raw, 0644); err != nil { + t.logger(ctx).Error("[discovery] Unable to write new topology to filesystem", "err", err) + metrics.Fetcher.File(l.WithResult(metrics.ErrWriteFile)).Inc() + return err + } + t.logger(ctx).Trace("[discovery] Topology written to filesystem", "file", t.filename, + "params", t.fetcher.Params) metrics.Fetcher.File(l).Inc() + return nil } func (t *task) callHandler(ctx context.Context, topo *topology.Topo) (bool, error) { @@ -368,15 +376,12 @@ func (t *task) callHandler(ctx context.Context, topo *topology.Topo) (bool, erro return updated, err } -func (t *task) logger(ctx context.Context) log.Logger { - return log.FromCtx(ctx).New("mode", t.mode) +func (t *task) static() bool { + return t.mode == discovery.Static } -func (t *task) metricType() string { - if t.mode == discovery.Static { - return metrics.Static - } - return metrics.Dynamic +func (t *task) logger(ctx context.Context) log.Logger { + return log.FromCtx(ctx).New("mode", t.mode) } type flag struct { diff --git a/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go b/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go index 7b1a0813b7..825fbe7b29 100644 --- a/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go +++ b/go/lib/infra/modules/idiscovery/internal/metrics/metrics.go @@ -38,14 +38,12 @@ const ( ErrWriteFile = "err_write_file" ) -// Metrics initialization. -var ( - Fetcher = newFetcher() -) +// Fetcher is the single-instance struct to get prometheus counters. +var Fetcher = newFetcher() // FetcherLabels defines the requests label set. type FetcherLabels struct { - Type string + Static bool Result string } @@ -56,12 +54,20 @@ func (l FetcherLabels) Labels() []string { // Values returns the values of the label in correct order. func (l FetcherLabels) Values() []string { - return []string{l.Type, l.Result} + if l.Static { + return []string{Static, l.Result} + } + return []string{Dynamic, l.Result} +} + +// WithResult returns the label set with the modified result. +func (l FetcherLabels) WithResult(result string) FetcherLabels { + l.Result = result + return l } type fetcher struct { - sent *prometheus.CounterVec - file *prometheus.CounterVec + sent, file *prometheus.CounterVec } func newFetcher() fetcher { @@ -73,10 +79,12 @@ func newFetcher() fetcher { } } +// Sent returns the prometheus counter. func (r fetcher) Sent(l FetcherLabels) prometheus.Counter { return r.sent.WithLabelValues(l.Values()...) } +// File returns the prometheus counter. func (r fetcher) File(l FetcherLabels) prometheus.Counter { return r.file.WithLabelValues(l.Values()...) } diff --git a/go/lib/infra/modules/itopo/internal/metrics/metrics.go b/go/lib/infra/modules/itopo/internal/metrics/metrics.go index 3a9de4924e..92292a39fa 100644 --- a/go/lib/infra/modules/itopo/internal/metrics/metrics.go +++ b/go/lib/infra/modules/itopo/internal/metrics/metrics.go @@ -37,9 +37,10 @@ const ( ErrCommit = "err_commit" ) -// Metrics initialization. var ( + // Current is the single-instance struct to get prometheus gauges. Current = newCurrent() + // Updates is the single-instance struct to get prometheus counters and gauges. Updates = newUpdates() ) @@ -78,22 +79,24 @@ func newCurrent() current { } } +// Timestamp returns the prometheus gauge. func (c current) Timestamp(l CurrentLabels) prometheus.Gauge { return c.timestamp.WithLabelValues(l.Values()...) } +// TTL returns the prometheus gauge. func (c current) TTL(l CurrentLabels) prometheus.Gauge { return c.ttl.WithLabelValues(l.Values()...) } +// Active returns the prometheus gauge. func (c current) Active(l CurrentLabels) prometheus.Gauge { return c.active.WithLabelValues(l.Values()...) } // UpdateLabels defines the update label set. type UpdateLabels struct { - Type string - Result string + Type, Result string } // Labels returns the name of the labels in correct order. @@ -126,10 +129,12 @@ func newUpdates() updates { } } +// Last returns the prometheus gauge. func (u updates) Last(l UpdateLabels) prometheus.Gauge { return u.last.WithLabelValues(l.Values()...) } +// Total returns the prometheus counter. func (u updates) Total(l UpdateLabels) prometheus.Counter { return u.total.WithLabelValues(l.Values()...) } From cbc4a6df036757b49a5836d85df3779b7b7c6c4b Mon Sep 17 00:00:00 2001 From: roos Date: Wed, 25 Sep 2019 13:57:14 +0200 Subject: [PATCH 3/3] Updated metrics: - itopo_creation_time_seconds (-> 0 when not set) - itopo_expiry_time_seconds (-> +Inf when not set) - itopo_dynamic_active Metrics added for idiscovery: # HELP idiscovery_file_writes_total The total number of file writes on updated topology # TYPE idiscovery_file_writes_total counter idiscovery_file_writes_total{result="ok_success",type="static"} 1 # HELP idiscovery_sent_requests_total The total number of requests sent to the discovey service # TYPE idiscovery_sent_requests_total counter idiscovery_sent_requests_total{result="err_request",type="dynamic"} 1 idiscovery_sent_requests_total{result="ok_success",type="dynamic"} 8 idiscovery_sent_requests_total{result="ok_success",type="static"} 1 Metrics added for itopo: # HELP itopo_creation_time_seconds The creation time specified in the current topology.Remains set for dynamic topology, even when inactive. # TYPE itopo_creation_time_seconds gauge itopo_creation_time_seconds{type="static"} 0 itopo_creation_time_seconds{type="dynamic"} 1.5693351735612462e+09 # HELP itopo_dynamic_active Indicate whether the dynamic topology is set and active. 0=inactive, 1=active. # TYPE itopo_dynamic_active gauge itopo_dynamic_active 0 # HELP itopo_expiry_time_seconds The expiry time specified in the current topology. Set to +Inf, if TTL is zero.Remains set for dynamic topology, even when inactive. # TYPE itopo_expiry_time_seconds gauge itopo_expiry_time_seconds{type="static"} +Inf itopo_expiry_time_seconds{type="dynamic"} 1.5693351735612462e+09 # HELP itopo_last_updates Timestamp of the last update attempts. # TYPE itopo_last_updates gauge itopo_last_updates{result="ok_success",type="dynamic"} 1.56933517356125e+09 itopo_last_updates{result="ok_success",type="static"} 1.5693351375621483e+09 # HELP itopo_updates_total The total number of updates. # TYPE itopo_updates_total counter itopo_updates_total{result="ok_success",type="dynamic"} 8 itopo_updates_total{result="ok_success",type="static"} 2 --- .../modules/itopo/internal/metrics/metrics.go | 51 +++++++++++++------ go/lib/infra/modules/itopo/itopo.go | 13 +++-- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/go/lib/infra/modules/itopo/internal/metrics/metrics.go b/go/lib/infra/modules/itopo/internal/metrics/metrics.go index 92292a39fa..fda70bba49 100644 --- a/go/lib/infra/modules/itopo/internal/metrics/metrics.go +++ b/go/lib/infra/modules/itopo/internal/metrics/metrics.go @@ -15,6 +15,9 @@ package metrics import ( + "math" + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/scionproto/scion/go/lib/prom" @@ -61,21 +64,20 @@ func (l CurrentLabels) Values() []string { type current struct { timestamp *prometheus.GaugeVec - ttl *prometheus.GaugeVec - active *prometheus.GaugeVec + expiry *prometheus.GaugeVec + active prometheus.Gauge } func newCurrent() current { return current{ - timestamp: prom.NewGaugeVec(Namespace, "", "current_timestamp", - "The timestamp of the current topology. Remains set, even when inactive.", - CurrentLabels{}.Labels()), - ttl: prom.NewGaugeVec(Namespace, "", "current_ttl_seconds", - "The TTL of the current topology. 0 indicates no TTL. Remains set, even when inactive.", - CurrentLabels{}.Labels()), - active: prom.NewGaugeVec(Namespace, "", "current_active", - "Indicate whether the current topology is active. 0=inactive, 1=active.", - CurrentLabels{}.Labels()), + timestamp: prom.NewGaugeVec(Namespace, "", "creation_time_seconds", + "The creation time specified in the current topology."+ + "Remains set for dynamic topology, even when inactive.", CurrentLabels{}.Labels()), + expiry: prom.NewGaugeVec(Namespace, "", "expiry_time_seconds", + "The expiry time specified in the current topology. Set to +Inf, if TTL is zero."+ + "Remains set for dynamic topology, even when inactive.", CurrentLabels{}.Labels()), + active: prom.NewGauge(Namespace, "", "dynamic_active", + "Indicate whether the dynamic topology is set and active. 0=inactive, 1=active."), } } @@ -84,14 +86,14 @@ func (c current) Timestamp(l CurrentLabels) prometheus.Gauge { return c.timestamp.WithLabelValues(l.Values()...) } -// TTL returns the prometheus gauge. -func (c current) TTL(l CurrentLabels) prometheus.Gauge { - return c.ttl.WithLabelValues(l.Values()...) +// Expiry returns the prometheus gauge. +func (c current) Expiry(l CurrentLabels) prometheus.Gauge { + return c.expiry.WithLabelValues(l.Values()...) } // Active returns the prometheus gauge. -func (c current) Active(l CurrentLabels) prometheus.Gauge { - return c.active.WithLabelValues(l.Values()...) +func (c current) Active() prometheus.Gauge { + return c.active } // UpdateLabels defines the update label set. @@ -138,3 +140,20 @@ func (u updates) Last(l UpdateLabels) prometheus.Gauge { func (u updates) Total(l UpdateLabels) prometheus.Counter { return u.total.WithLabelValues(l.Values()...) } + +// Timestamp returns the time as unix time in seconds. +func Timestamp(ts time.Time) float64 { + if ts.IsZero() { + return 0 + } + return float64(ts.UnixNano() / 1e9) +} + +// Expiry returns the expiry time as unix time in seconds. In case of the zero +// value, +inf is returned. +func Expiry(ts time.Time) float64 { + if ts.IsZero() { + return math.Inf(+1) + } + return float64(ts.UnixNano() / 1e9) +} diff --git a/go/lib/infra/modules/itopo/itopo.go b/go/lib/infra/modules/itopo/itopo.go index 9923c42f1e..fb0bf7752d 100644 --- a/go/lib/infra/modules/itopo/itopo.go +++ b/go/lib/infra/modules/itopo/itopo.go @@ -241,15 +241,15 @@ func (s *state) setDynamic(dynamic *topology.Topo) (*topology.Topo, bool, error) func (s *state) updateDynamic(dynamic *topology.Topo) { s.topo.dynamic = dynamic cl := metrics.CurrentLabels{Type: metrics.Dynamic} - metrics.Current.Active(cl).Set(1) - metrics.Current.Timestamp(cl).SetToCurrentTime() - metrics.Current.TTL(cl).Set(float64(dynamic.TTL)) + metrics.Current.Active().Set(1) + metrics.Current.Timestamp(cl).Set(metrics.Timestamp(dynamic.Timestamp)) + metrics.Current.Expiry(cl).Set(metrics.Expiry(dynamic.Expiry())) } func (s *state) dropDynamic() { s.topo.dynamic = nil call(s.clbks.DropDynamic) - metrics.Current.Active(metrics.CurrentLabels{Type: metrics.Dynamic}).Set(0) + metrics.Current.Active().Set(0) } func (s *state) beginSetDynamic(dynamic *topology.Topo) (Transaction, error) { @@ -334,9 +334,8 @@ func (s *state) updateStatic(static *topology.Topo) { s.topo.static = static call(s.clbks.UpdateStatic) cl := metrics.CurrentLabels{Type: metrics.Static} - metrics.Current.Active(cl).Set(1) - metrics.Current.Timestamp(cl).SetToCurrentTime() - metrics.Current.TTL(cl).Set(float64(static.TTL)) + metrics.Current.Timestamp(cl).Set(metrics.Timestamp(static.Timestamp)) + metrics.Current.Expiry(cl).Set(metrics.Expiry(static.Expiry())) } func keepOld(newTopo, oldTopo *topology.Topo) bool {