diff --git a/CHANGELOG.md b/CHANGELOG.md index c24b3c09a8..5ca93d7d41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1669](https://github.com/thanos-io/thanos/pull/1669) Fixed store sharding. Now it does not load excluded meta.jsons and load/fetch index-cache.json files. - [#1670](https://github.com/thanos-io/thanos/pull/1670) Fixed un-ordered blocks upload. Sidecar now uploads the oldest blocks first. - [#1568](https://github.com/thanos-io/thanos/pull/1709) Thanos Store now retains the first raw value of a chunk during downsampling to avoid losing some counter resets that occur on an aggregation boundary. +- [#1773](https://github.com/thanos-io/thanos/pull/1773) Thanos Ruler: fixed the /api/v1/rules endpoint that returned 500 status code with `failed to assert type of rule ...` message. - [#1770](https://github.com/thanos-io/thanos/pull/1770) Fix `--web.external-prefix` 404s for static resources. ### Changed diff --git a/pkg/rule/api/v1.go b/pkg/rule/api/v1.go index 9b6d41a030..f49eb72291 100644 --- a/pkg/rule/api/v1.go +++ b/pkg/rule/api/v1.go @@ -84,7 +84,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { } switch rule := r.(type) { - case thanosrule.AlertingRule: + case *rules.AlertingRule: enrichedRule = alertingRule{ Name: rule.Name(), Query: rule.Query().String(), @@ -95,7 +95,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { Health: rule.Health(), LastError: lastError, Type: "alerting", - PartialResponseStrategy: rule.PartialResponseStrategy.String(), + PartialResponseStrategy: grp.PartialResponseStrategy.String(), } case *rules.RecordingRule: enrichedRule = recordingRule{ @@ -107,7 +107,7 @@ func (api *API) rules(r *http.Request) (interface{}, []error, *qapi.ApiError) { Type: "recording", } default: - err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name()) + err := fmt.Errorf("rule %q: unsupported type %T", r.Name(), rule) return nil, nil, &qapi.ApiError{Typ: qapi.ErrorInternal, Err: err} } diff --git a/pkg/rule/api/v1_test.go b/pkg/rule/api/v1_test.go index 96a3a348fb..d949441740 100644 --- a/pkg/rule/api/v1_test.go +++ b/pkg/rule/api/v1_test.go @@ -62,8 +62,6 @@ type rulesRetrieverMock struct { } func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { - var ar rulesRetrieverMock - arules := ar.AlertingRules() storage := newStorage(m.testing) engineOpts := promql.EngineOpts{ @@ -83,9 +81,8 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { } var r []rules.Rule - - for _, alertrule := range arules { - r = append(r, alertrule) + for _, ar := range alertingRules(m.testing) { + r = append(r, ar) } recordingExpr, err := promql.ParseExpr(`vector(1)`) @@ -96,43 +93,49 @@ func (m rulesRetrieverMock) RuleGroups() []thanosrule.Group { r = append(r, recordingRule) group := rules.NewGroup("grp", "/path/to/file", time.Second, r, false, opts) - return []thanosrule.Group{{Group: group}} + return []thanosrule.Group{thanosrule.Group{Group: group}} } func (m rulesRetrieverMock) AlertingRules() []thanosrule.AlertingRule { + var ars []thanosrule.AlertingRule + for _, ar := range alertingRules(m.testing) { + ars = append(ars, thanosrule.AlertingRule{AlertingRule: ar}) + } + return ars +} + +func alertingRules(t *testing.T) []*rules.AlertingRule { expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) if err != nil { - m.testing.Fatalf("unable to parse alert expression: %s", err) + t.Fatalf("unable to parse alert expression: %s", err) } expr2, err := promql.ParseExpr(`up == 1`) if err != nil { - m.testing.Fatalf("Unable to parse alert expression: %s", err) + t.Fatalf("unable to parse alert expression: %s", err) } - rule1 := rules.NewAlertingRule( - "test_metric3", - expr1, - time.Second, - labels.Labels{}, - labels.Labels{}, - labels.Labels{}, - true, - log.NewNopLogger(), - ) - rule2 := rules.NewAlertingRule( - "test_metric4", - expr2, - time.Second, - labels.Labels{}, - labels.Labels{}, - labels.Labels{}, - true, - log.NewNopLogger(), - ) - var r []thanosrule.AlertingRule - r = append(r, thanosrule.AlertingRule{AlertingRule: rule1}) - r = append(r, thanosrule.AlertingRule{AlertingRule: rule2}) - return r + return []*rules.AlertingRule{ + rules.NewAlertingRule( + "test_metric3", + expr1, + time.Second, + labels.Labels{}, + labels.Labels{}, + labels.Labels{}, + true, + log.NewNopLogger(), + ), + rules.NewAlertingRule( + "test_metric4", + expr2, + time.Second, + labels.Labels{}, + labels.Labels{}, + labels.Labels{}, + true, + log.NewNopLogger(), + ), + } } func TestEndpoints(t *testing.T) { @@ -171,16 +174,17 @@ func TestEndpoints(t *testing.T) { } func testEndpoints(t *testing.T, api *API) { - type test struct { - endpoint qapi.ApiFunc - params map[string]string - query url.Values - response interface{} + endpointFn qapi.ApiFunc + endpointName string + params map[string]string + query url.Values + response interface{} } var tests = []test{ { - endpoint: api.rules, + endpointFn: api.rules, + endpointName: "rules", response: &RuleDiscovery{ RuleGroups: []*RuleGroup{ { @@ -232,27 +236,28 @@ func testEndpoints(t *testing.T, api *API) { request := func(m string, q url.Values) (*http.Request, error) { return http.NewRequest(m, fmt.Sprintf("http://example.com?%s", q.Encode()), nil) } - for i, test := range tests { - for _, method := range methods(test.endpoint) { - // Build a context with the correct request params. - ctx := context.Background() - for p, v := range test.params { - ctx = route.WithParam(ctx, p, v) - } - t.Logf("run %d\t%s\t%q", i, method, test.query.Encode()) + for _, test := range tests { + for _, method := range methods(test.endpointFn) { + t.Run(fmt.Sprintf("endpoint=%s/method=%s/query=%q", test.endpointName, method, test.query.Encode()), func(t *testing.T) { + // Build a context with the correct request params. + ctx := context.Background() + for p, v := range test.params { + ctx = route.WithParam(ctx, p, v) + } - req, err := request(method, test.query) - if err != nil { - t.Fatal(err) - } - endpoint, errors, apiError := test.endpoint(req.WithContext(ctx)) + req, err := request(method, test.query) + if err != nil { + t.Fatal(err) + } + endpoint, errors, apiError := test.endpointFn(req.WithContext(ctx)) - if errors != nil { - t.Fatalf("Unexpected errors: %s", errors) - return - } - assertAPIError(t, apiError) - assertAPIResponse(t, endpoint, test.response) + if errors != nil { + t.Fatalf("Unexpected errors: %s", errors) + return + } + assertAPIError(t, apiError) + assertAPIResponse(t, endpoint, test.response) + }) } } } diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go index 5156d24d91..5bad87e4c1 100644 --- a/pkg/rule/rule.go +++ b/pkg/rule/rule.go @@ -112,8 +112,8 @@ func (r RuleGroup) MarshalYAML() (interface{}, error) { // special field in RuleGroup file. func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []string) error { var ( - errs = tsdberrors.MultiError{} - filesMap = map[storepb.PartialResponseStrategy][]string{} + errs = tsdberrors.MultiError{} + filesByStrategy = map[storepb.PartialResponseStrategy][]string{} ) if err := os.RemoveAll(path.Join(dataDir, tmpRuleDir)); err != nil { @@ -138,19 +138,19 @@ func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []st // NOTE: This is very ugly, but we need to reparse it into tmp dir without the field to have to reuse // rules.Manager. The problem is that it uses yaml.UnmarshalStrict for some reasons. - mapped := map[storepb.PartialResponseStrategy]*rulefmt.RuleGroups{} + groupsByStrategy := map[storepb.PartialResponseStrategy]*rulefmt.RuleGroups{} for _, rg := range rg.Groups { - if _, ok := mapped[*rg.PartialResponseStrategy]; !ok { - mapped[*rg.PartialResponseStrategy] = &rulefmt.RuleGroups{} + if _, ok := groupsByStrategy[*rg.PartialResponseStrategy]; !ok { + groupsByStrategy[*rg.PartialResponseStrategy] = &rulefmt.RuleGroups{} } - mapped[*rg.PartialResponseStrategy].Groups = append( - mapped[*rg.PartialResponseStrategy].Groups, + groupsByStrategy[*rg.PartialResponseStrategy].Groups = append( + groupsByStrategy[*rg.PartialResponseStrategy].Groups, rg.RuleGroup, ) } - for s, rg := range mapped { + for s, rg := range groupsByStrategy { b, err := yaml.Marshal(rg) if err != nil { errs = append(errs, err) @@ -163,20 +163,19 @@ func (m *Managers) Update(dataDir string, evalInterval time.Duration, files []st continue } - filesMap[s] = append(filesMap[s], newFn) + filesByStrategy[s] = append(filesByStrategy[s], newFn) } - } - for s, fs := range filesMap { - updater, ok := (*m)[s] + for s, fs := range filesByStrategy { + mgr, ok := (*m)[s] if !ok { errs = append(errs, errors.Errorf("no updater found for %v", s)) continue } // We add external labels in `pkg/alert.Queue`. // TODO(bwplotka): Investigate if we should put ext labels here or not. - if err := updater.Update(evalInterval, fs, nil); err != nil { + if err := mgr.Update(evalInterval, fs, nil); err != nil { errs = append(errs, err) continue } diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index d8c840c82c..272a3ed98a 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -189,7 +189,7 @@ func TestRule(t *testing.T) { return nil })) - // checks counter ensures we are not missing metrics. + // The checks counter ensures that we are not missing metrics. checks := 0 // Check metrics to make sure we report correct ones that allow handling the AlwaysFiring not being triggered because of query issue. testutil.Ok(t, promclient.MetricValues(ctx, nil, urlParse(t, r1.HTTP.URL()), func(lset labels.Labels, val float64) error { @@ -204,6 +204,15 @@ func TestRule(t *testing.T) { return nil })) testutil.Equals(t, 2, checks) + + // Verify API endpoints. + for _, endpoint := range []string{"/api/v1/rules", "/api/v1/alerts"} { + for _, r := range []*serverScheduler{r1, r2} { + code, _, err := getAPIEndpoint(ctx, r.HTTP.URL()+endpoint) + testutil.Ok(t, err) + testutil.Equals(t, 200, code) + } + } } type failingStoreAPI struct{} @@ -438,27 +447,17 @@ func TestRulePartialResponse(t *testing.T) { // TODO(bwplotka): Move to promclient. func queryAlertmanagerAlerts(ctx context.Context, url string) ([]*model.Alert, error) { - req, err := http.NewRequest("GET", url+"/api/v1/alerts", nil) + code, body, err := getAPIEndpoint(ctx, url+"/api/v1/alerts") if err != nil { return nil, err } - req = req.WithContext(ctx) - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err + if code != 200 { + return nil, errors.Errorf("expected 200 response, got %d", code) } - defer runutil.CloseWithLogOnErr(nil, resp.Body, "close body query alertmanager") var v struct { Data []*model.Alert `json:"data"` } - - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return nil, err - } - if err = json.Unmarshal(body, &v); err != nil { return nil, err } @@ -468,3 +467,22 @@ func queryAlertmanagerAlerts(ctx context.Context, url string) ([]*model.Alert, e }) return v.Data, nil } + +func getAPIEndpoint(ctx context.Context, url string) (int, []byte, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return 0, nil, err + } + req = req.WithContext(ctx) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return 0, nil, err + } + defer runutil.CloseWithLogOnErr(nil, resp.Body, "%s: close body", req.URL.String()) + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return 0, nil, err + } + return resp.StatusCode, body, nil +}