diff --git a/prober/dns.go b/prober/dns.go index 5ecb6f07..4b3aeac0 100644 --- a/prober/dns.go +++ b/prober/dns.go @@ -126,6 +126,10 @@ func validRcode(rcode int, valid []string, logger log.Logger) bool { func ProbeDNS(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) bool { var dialProtocol string + probeDNSDurationGaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "probe_dns_duration_seconds", + Help: "Duration of DNS request by phase", + }, []string{"phase"}) probeDNSAnswerRRSGauge := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "probe_dns_answer_rrs", Help: "Returns number of entries in the answer resource record list", @@ -138,6 +142,12 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry Name: "probe_dns_additional_rrs", Help: "Returns number of entries in the additional resource record list", }) + + for _, lv := range []string{"resolve", "connect", "request"} { + probeDNSDurationGaugeVec.WithLabelValues(lv) + } + + registry.MustRegister(probeDNSDurationGaugeVec) registry.MustRegister(probeDNSAnswerRRSGauge) registry.MustRegister(probeDNSAuthorityRRSGauge) registry.MustRegister(probeDNSAdditionalRRSGauge) @@ -182,11 +192,12 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry } targetAddr = target } - ip, _, err = chooseProtocol(ctx, module.DNS.IPProtocol, module.DNS.IPProtocolFallback, targetAddr, registry, logger) + ip, lookupTime, err := chooseProtocol(ctx, module.DNS.IPProtocol, module.DNS.IPProtocolFallback, targetAddr, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return false } + probeDNSDurationGaugeVec.WithLabelValues("resolve").Add(lookupTime) targetIP := net.JoinHostPort(ip.String(), port) if ip.IP.To4() == nil { @@ -246,7 +257,14 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry level.Info(logger).Log("msg", "Making DNS query", "target", targetIP, "dial_protocol", dialProtocol, "query", module.DNS.QueryName, "type", qt, "class", qc) timeoutDeadline, _ := ctx.Deadline() client.Timeout = time.Until(timeoutDeadline) - response, _, err := client.Exchange(msg, targetIP) + requestStart := time.Now() + response, rtt, err := client.Exchange(msg, targetIP) + // The rtt value returned from client.Exchange includes only the time to + // exchange messages with the server _after_ the connection is created. + // We compute the connection time as the total time for the operation + // minus the time for the actual request rtt. + probeDNSDurationGaugeVec.WithLabelValues("connect").Set((time.Since(requestStart) - rtt).Seconds()) + probeDNSDurationGaugeVec.WithLabelValues("request").Set(rtt.Seconds()) if err != nil { level.Error(logger).Log("msg", "Error while sending a DNS query", "err", err) return false diff --git a/prober/dns_test.go b/prober/dns_test.go index 75dc7c33..ef2d2b0c 100644 --- a/prober/dns_test.go +++ b/prober/dns_test.go @@ -570,3 +570,49 @@ func TestDNSProtocol(t *testing.T) { } } + +// TestDNSMetrics checks that calling ProbeDNS populates the expected +// set of metrics for a DNS probe, but it does not test that those +// metrics contain specific values. +func TestDNSMetrics(t *testing.T) { + server, addr := startDNSServer("udp", recursiveDNSHandler) + defer server.Shutdown() + + _, port, _ := net.SplitHostPort(addr.String()) + + module := config.Module{ + Timeout: time.Second, + DNS: config.DNSProbe{ + IPProtocol: "ip4", + IPProtocolFallback: true, + QueryName: "example.com", + }, + } + registry := prometheus.NewRegistry() + testCTX, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + result := ProbeDNS(testCTX, net.JoinHostPort("localhost", port), module, registry, log.NewNopLogger()) + if !result { + t.Fatalf("DNS test connection failed, expected success.") + } + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + + expectedMetrics := map[string]map[string]map[string]struct{}{ + "probe_dns_lookup_time_seconds": nil, + "probe_dns_duration_seconds": { + "phase": { + "resolve": {}, + "connect": {}, + "request": {}, + }, + }, + "probe_dns_answer_rrs": nil, + "probe_dns_authority_rrs": nil, + "probe_dns_additional_rrs": nil, + } + + checkMetrics(expectedMetrics, mfs, t) +} diff --git a/prober/http_test.go b/prober/http_test.go index 3d3f63e7..b2c28ae0 100644 --- a/prober/http_test.go +++ b/prober/http_test.go @@ -702,42 +702,19 @@ func TestHTTPPhases(t *testing.T) { t.Fatal(err) } - found := false - foundLabel := map[string]bool{ - "connect": false, - "processing": false, - "resolve": false, - "transfer": false, - "tls": false, - } - for _, mf := range mfs { - if mf.GetName() == "probe_http_duration_seconds" { - found = true - for _, metric := range mf.GetMetric() { - for _, lp := range metric.Label { - if lp.GetName() == "phase" { - f, ok := foundLabel[lp.GetValue()] - if !ok { - t.Fatalf("Unexpected label phase=%s", lp.GetValue()) - } - if f { - t.Fatalf("Label phase=%s duplicated", lp.GetValue()) - } - foundLabel[lp.GetValue()] = true - } - } - } - - } - } - if !found { - t.Fatal("probe_http_duration_seconds not found") - } - for lv, found := range foundLabel { - if !found { - t.Fatalf("Label phase=%s not found", lv) - } + expectedMetrics := map[string]map[string]map[string]struct{}{ + "probe_http_duration_seconds": { + "phase": { + "connect": {}, + "processing": {}, + "resolve": {}, + "transfer": {}, + "tls": {}, + }, + }, } + + checkMetrics(expectedMetrics, mfs, t) } func TestCookieJar(t *testing.T) { diff --git a/prober/utils_test.go b/prober/utils_test.go index 2ac38eeb..a53ee490 100644 --- a/prober/utils_test.go +++ b/prober/utils_test.go @@ -171,3 +171,83 @@ func TestChooseProtocol(t *testing.T) { t.Error("without fallback it should not answer") } } + +func checkMetrics(expected map[string]map[string]map[string]struct{}, mfs []*dto.MetricFamily, t *testing.T) { + type ( + valueValidation struct { + found bool + } + labelValidation struct { + found bool + values map[string]valueValidation + } + metricValidation struct { + found bool + labels map[string]labelValidation + } + ) + + foundMetrics := map[string]metricValidation{} + + for mname, labels := range expected { + var mv metricValidation + if labels != nil { + mv.labels = map[string]labelValidation{} + for lname, values := range labels { + var lv labelValidation + if values != nil { + lv.values = map[string]valueValidation{} + for vname, _ := range values { + lv.values[vname] = valueValidation{} + } + } + mv.labels[lname] = lv + } + } + foundMetrics[mname] = mv + } + + for _, mf := range mfs { + info, wanted := foundMetrics[mf.GetName()] + if !wanted { + continue + } + info.found = true + for _, metric := range mf.GetMetric() { + if info.labels == nil { + continue + } + for _, lp := range metric.Label { + if label, labelWanted := info.labels[lp.GetName()]; labelWanted { + label.found = true + if label.values != nil { + if value, wanted := label.values[lp.GetValue()]; !wanted { + t.Fatalf("Unexpected label %s=%s", lp.GetName(), lp.GetValue()) + } else if value.found { + t.Fatalf("Label %s=%s duplicated", lp.GetName(), lp.GetValue()) + } + label.values[lp.GetValue()] = valueValidation{found: true} + } + info.labels[lp.GetName()] = label + } + } + } + foundMetrics[mf.GetName()] = info + } + + for mname, m := range foundMetrics { + if !m.found { + t.Fatalf("metric %s wanted, not found", mname) + } + for lname, label := range m.labels { + if !label.found { + t.Fatalf("metric %s, label %s wanted, not found", mname, lname) + } + for vname, value := range label.values { + if !value.found { + t.Fatalf("metric %s, label %s, value %s wanted, not found", mname, lname, vname) + } + } + } + } +}