From cd6271c7284bf0cd96daf619ce9ae526d0126d9b Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Thu, 21 Nov 2019 12:44:43 +0100 Subject: [PATCH 1/3] test/e2e: test alerts and rules API endpoints Signed-off-by: Simon Pasquier --- test/e2e/rule_test.go | 46 ++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index d8c840c82c..272a3ed98a 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -189,7 +189,7 @@ func TestRule(t *testing.T) { return nil })) - // checks counter ensures we are not missing metrics. + // The checks counter ensures that we are not missing metrics. checks := 0 // Check metrics to make sure we report correct ones that allow handling the AlwaysFiring not being triggered because of query issue. testutil.Ok(t, promclient.MetricValues(ctx, nil, urlParse(t, r1.HTTP.URL()), func(lset labels.Labels, val float64) error { @@ -204,6 +204,15 @@ func TestRule(t *testing.T) { return nil })) testutil.Equals(t, 2, checks) + + // Verify API endpoints. + for _, endpoint := range []string{"/api/v1/rules", "/api/v1/alerts"} { + for _, r := range []*serverScheduler{r1, r2} { + code, _, err := getAPIEndpoint(ctx, r.HTTP.URL()+endpoint) + testutil.Ok(t, err) + testutil.Equals(t, 200, code) + } + } } type failingStoreAPI struct{} @@ -438,27 +447,17 @@ func TestRulePartialResponse(t *testing.T) { // TODO(bwplotka): Move to promclient. func queryAlertmanagerAlerts(ctx context.Context, url string) ([]*model.Alert, error) { - req, err := http.NewRequest("GET", url+"/api/v1/alerts", nil) + code, body, err := getAPIEndpoint(ctx, url+"/api/v1/alerts") if err != nil { return nil, err } - req = req.WithContext(ctx) - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err + if code != 200 { + return nil, errors.Errorf("expected 200 response, got %d", code) } - defer runutil.CloseWithLogOnErr(nil, resp.Body, "close body query alertmanager") var v struct { Data []*model.Alert `json:"data"` } - - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return nil, err - } - if err = json.Unmarshal(body, &v); err != nil { return nil, err } @@ -468,3 +467,22 @@ func queryAlertmanagerAlerts(ctx context.Context, url string) ([]*model.Alert, e }) return v.Data, nil } + +func getAPIEndpoint(ctx context.Context, url string) (int, []byte, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return 0, nil, err + } + req = req.WithContext(ctx) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return 0, nil, err + } + defer runutil.CloseWithLogOnErr(nil, resp.Body, "%s: close body", req.URL.String()) + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return 0, nil, err + } + return resp.StatusCode, body, nil +} From cbfac080e50344d719593966595d6bf19ba11a44 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Thu, 21 Nov 2019 14:10:04 +0100 Subject: [PATCH 2/3] pkg/rule: fix /api/v1/rules endpoint Signed-off-by: Simon Pasquier --- CHANGELOG.md | 1 + pkg/rule/api/v1.go | 6 +- pkg/rule/api/v1_test.go | 119 +++++++++++++++++++++------------------- pkg/rule/rule.go | 25 ++++----- 4 files changed, 78 insertions(+), 73 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3f0ede657..68b9abfb83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1669](https://github.com/thanos-io/thanos/pull/1669) Fixed store sharding. Now it does not load excluded meta.jsons and load/fetch index-cache.json files. - [#1670](https://github.com/thanos-io/thanos/pull/1670) Fixed un-ordered blocks upload. Sidecar now uploads the oldest blocks first. - [#1568](https://github.com/thanos-io/thanos/pull/1709) Thanos Store now retains the first raw value of a chunk during downsampling to avoid losing some counter resets that occur on an aggregation boundary. +- [#1773](https://github.com/thanos-io/thanos/pull/1773) Thanos Ruler: fixed the /api/v1/rules endpoint. ### Changed diff --git a/pkg/rule/api/v1.go b/pkg/rule/api/v1.go index 9b6d41a030..f49eb72291 100644 --- a/pkg/rule/api/v1.go +++ b/pkg/rule/api/v1.go @@ -84,7 +84,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { } switch rule := r.(type) { - case thanosrule.AlertingRule: + case *rules.AlertingRule: enrichedRule = alertingRule{ Name: rule.Name(), Query: rule.Query().String(), @@ -95,7 +95,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { Health: rule.Health(), LastError: lastError, Type: "alerting", - PartialResponseStrategy: rule.PartialResponseStrategy.String(), + PartialResponseStrategy: grp.PartialResponseStrategy.String(), } case *rules.RecordingRule: enrichedRule = recordingRule{ @@ -107,7 +107,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { Type: "recording", } default: - err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name()) + err := fmt.Errorf("rule %q: unsupported type %T", r.Name(), rule) return nil, nil, &qapi.ApiError{Typ: qapi.ErrorInternal, Err: err} } diff --git a/pkg/rule/api/v1_test.go b/pkg/rule/api/v1_test.go index 96a3a348fb..a2affc8d0a 100644 --- a/pkg/rule/api/v1_test.go +++ b/pkg/rule/api/v1_test.go @@ -62,8 +62,6 @@ type rulesRetrieverMock struct { } func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { - var ar rulesRetrieverMock - arules := ar.AlertingRules() storage := newStorage(m.testing) engineOpts := promql.EngineOpts{ @@ -83,9 +81,8 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { } var r []rules.Rule - - for _, alertrule := range arules { - r = append(r, alertrule) + for _, ar := range alertingRules() { + r = append(r, ar) } recordingExpr, err := promql.ParseExpr(`vector(1)`) @@ -96,43 +93,49 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { r = append(r, recordingRule) group := rules.NewGroup("grp", "/path/to/file", time.Second, r, false, opts) - return []thanosrule.Group{{Group: group}} + return []thanosrule.Group{thanosrule.Group{Group: group}} } func (m rulesRetrieverMock) AlertingRules() []thanosrule.AlertingRule { + var ars []thanosrule.AlertingRule + for _, ar := range alertingRules() { + ars = append(ars, thanosrule.AlertingRule{AlertingRule: ar}) + } + return ars +} + +func alertingRules() []*rules.AlertingRule { expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) if err != nil { - m.testing.Fatalf("unable to parse alert expression: %s", err) + panic(fmt.Sprintf("unable to parse alert expression: %s", err)) } expr2, err := promql.ParseExpr(`up == 1`) if err != nil { - m.testing.Fatalf("Unable to parse alert expression: %s", err) + panic(fmt.Sprintf("unable to parse alert expression: %s", err)) } - rule1 := rules.NewAlertingRule( - "test_metric3", - expr1, - time.Second, - labels.Labels{}, - labels.Labels{}, - labels.Labels{}, - true, - log.NewNopLogger(), - ) - rule2 := rules.NewAlertingRule( - "test_metric4", - expr2, - time.Second, - labels.Labels{}, - labels.Labels{}, - labels.Labels{}, - true, - log.NewNopLogger(), - ) - var r []thanosrule.AlertingRule - r = append(r, thanosrule.AlertingRule{AlertingRule: rule1}) - r = append(r, thanosrule.AlertingRule{AlertingRule: rule2}) - return r + return []*rules.AlertingRule{ + rules.NewAlertingRule( + "test_metric3", + expr1, + time.Second, + labels.Labels{}, + labels.Labels{}, + labels.Labels{}, + true, + log.NewNopLogger(), + ), + rules.NewAlertingRule( + "test_metric4", + expr2, + time.Second, + labels.Labels{}, + labels.Labels{}, + labels.Labels{}, + true, + log.NewNopLogger(), + ), + } } func TestEndpoints(t *testing.T) { @@ -171,16 +174,17 @@ func TestEndpoints(t *testing.T) { } func testEndpoints(t *testing.T, api *API) { - type test struct { - endpoint qapi.ApiFunc - params map[string]string - query url.Values - response interface{} + endpointFn qapi.ApiFunc + endpointName string + params map[string]string + query url.Values + response interface{} } var tests = []test{ { - endpoint: api.rules, + endpointFn: api.rules, + endpointName: "rules", response: &RuleDiscovery{ RuleGroups: []*RuleGroup{ { @@ -232,27 +236,28 @@ func testEndpoints(t *testing.T, api *API) { request := func(m string, q url.Values) (*http.Request, error) { return http.NewRequest(m, fmt.Sprintf("http://example.com?%s", q.Encode()), nil) } - for i, test := range tests { - for _, method := range methods(test.endpoint) { - // Build a context with the correct request params. - ctx := context.Background() - for p, v := range test.params { - ctx = route.WithParam(ctx, p, v) - } - t.Logf("run %d\t%s\t%q", i, method, test.query.Encode()) + for _, test := range tests { + for _, method := range methods(test.endpointFn) { + t.Run(fmt.Sprintf("endpoint=%s/method=%s/query=%q", test.endpointName, method, test.query.Encode()), func(t *testing.T) { + // Build a context with the correct request params. + ctx := context.Background() + for p, v := range test.params { + ctx = route.WithParam(ctx, p, v) + } - req, err := request(method, test.query) - if err != nil { - t.Fatal(err) - } - endpoint, errors, apiError := test.endpoint(req.WithContext(ctx)) + req, err := request(method, test.query) + if err != nil { + t.Fatal(err) + } + endpoint, errors, apiError := test.endpointFn(req.WithContext(ctx)) - if errors != nil { - t.Fatalf("Unexpected errors: %s", errors) - return - } - assertAPIError(t, apiError) - assertAPIResponse(t, endpoint, test.response) + if errors != nil { + t.Fatalf("Unexpected errors: %s", errors) + return + } + assertAPIError(t, apiError) + assertAPIResponse(t, endpoint, test.response) + }) } } } diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go index 5156d24d91..5bad87e4c1 100644 --- a/pkg/rule/rule.go +++ b/pkg/rule/rule.go @@ -112,8 +112,8 @@ func (r RuleGroup) MarshalYAML() (interface{}, error) { // special field in RuleGroup file. func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []string) error { var ( - errs = tsdberrors.MultiError{} - filesMap = map[storepb.PartialResponseStrategy][]string{} + errs = tsdberrors.MultiError{} + filesByStrategy = map[storepb.PartialResponseStrategy][]string{} ) if err := os.RemoveAll(path.Join(dataDir, tmpRuleDir)); err != nil { @@ -138,19 +138,19 @@ func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []st // NOTE: This is very ugly, but we need to reparse it into tmp dir without the field to have to reuse // rules.Manager. The problem is that it uses yaml.UnmarshalStrict for some reasons. - mapped := map[storepb.PartialResponseStrategy]*rulefmt.RuleGroups{} + groupsByStrategy := map[storepb.PartialResponseStrategy]*rulefmt.RuleGroups{} for _, rg := range rg.Groups { - if _, ok := mapped[*rg.PartialResponseStrategy]; !ok { - mapped[*rg.PartialResponseStrategy] = &rulefmt.RuleGroups{} + if _, ok := groupsByStrategy[*rg.PartialResponseStrategy]; !ok { + groupsByStrategy[*rg.PartialResponseStrategy] = &rulefmt.RuleGroups{} } - mapped[*rg.PartialResponseStrategy].Groups = append( - mapped[*rg.PartialResponseStrategy].Groups, + groupsByStrategy[*rg.PartialResponseStrategy].Groups = append( + groupsByStrategy[*rg.PartialResponseStrategy].Groups, rg.RuleGroup, ) } - for s, rg := range mapped { + for s, rg := range groupsByStrategy { b, err := yaml.Marshal(rg) if err != nil { errs = append(errs, err) @@ -163,20 +163,19 @@ func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []st continue } - filesMap[s] = append(filesMap[s], newFn) + filesByStrategy[s] = append(filesByStrategy[s], newFn) } - } - for s, fs := range filesMap { - updater, ok := (*m)[s] + for s, fs := range filesByStrategy { + mgr, ok := (*m)[s] if !ok { errs = append(errs, errors.Errorf("no updater found for %v", s)) continue } // We add external labels in `pkg/alert.Queue`. // TODO(bwplotka): Investigate if we should put ext labels here or not. - if err := updater.Update(evalInterval, fs, nil); err != nil { + if err := mgr.Update(evalInterval, fs, nil); err != nil { errs = append(errs, err) continue } From dfee00c47d278a20acdc4aaf978ab6be9fc6b8cb Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Fri, 22 Nov 2019 09:29:10 +0100 Subject: [PATCH 3/3] Address Bartek's comments Signed-off-by: Simon Pasquier --- CHANGELOG.md | 2 +- pkg/rule/api/v1_test.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68b9abfb83..8b94a5c7b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1669](https://github.com/thanos-io/thanos/pull/1669) Fixed store sharding. Now it does not load excluded meta.jsons and load/fetch index-cache.json files. - [#1670](https://github.com/thanos-io/thanos/pull/1670) Fixed un-ordered blocks upload. Sidecar now uploads the oldest blocks first. - [#1568](https://github.com/thanos-io/thanos/pull/1709) Thanos Store now retains the first raw value of a chunk during downsampling to avoid losing some counter resets that occur on an aggregation boundary. -- [#1773](https://github.com/thanos-io/thanos/pull/1773) Thanos Ruler: fixed the /api/v1/rules endpoint. +- [#1773](https://github.com/thanos-io/thanos/pull/1773) Thanos Ruler: fixed the /api/v1/rules endpoint that returned 500 status code with `failed to assert type of rule ...` message. ### Changed diff --git a/pkg/rule/api/v1_test.go b/pkg/rule/api/v1_test.go index a2affc8d0a..d949441740 100644 --- a/pkg/rule/api/v1_test.go +++ b/pkg/rule/api/v1_test.go @@ -81,7 +81,7 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { } var r []rules.Rule - for _, ar := range alertingRules() { + for _, ar := range alertingRules(m.testing) { r = append(r, ar) } @@ -98,20 +98,20 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { func (m rulesRetrieverMock) AlertingRules() []thanosrule.AlertingRule { var ars []thanosrule.AlertingRule - for _, ar := range alertingRules() { + for _, ar := range alertingRules(m.testing) { ars = append(ars, thanosrule.AlertingRule{AlertingRule: ar}) } return ars } -func alertingRules() []*rules.AlertingRule { +func alertingRules(t *testing.T) []*rules.AlertingRule { expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) if err != nil { - panic(fmt.Sprintf("unable to parse alert expression: %s", err)) + t.Fatalf("unable to parse alert expression: %s", err) } expr2, err := promql.ParseExpr(`up == 1`) if err != nil { - panic(fmt.Sprintf("unable to parse alert expression: %s", err)) + t.Fatalf("unable to parse alert expression: %s", err) } return []*rules.AlertingRule{