From 338bc863a480f37c7665c23322e87a3e4cae19fc Mon Sep 17 00:00:00 2001 From: Yuri Shkuro Date: Sun, 18 Feb 2024 17:05:41 -0500 Subject: [PATCH 1/2] Return more detailed errors from ES storage Signed-off-by: Yuri Shkuro --- pkg/es/errors.go | 33 +++++++++++++++++ pkg/es/errors_test.go | 35 +++++++++++++++++++ plugin/storage/es/spanstore/reader.go | 7 ++-- .../storage/es/spanstore/service_operation.go | 4 +-- 4 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 pkg/es/errors.go create mode 100644 pkg/es/errors_test.go diff --git a/pkg/es/errors.go b/pkg/es/errors.go new file mode 100644 index 00000000000..d29c0e157e7 --- /dev/null +++ b/pkg/es/errors.go @@ -0,0 +1,33 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package es + +import ( + "errors" + "fmt" + + "github.com/olivere/elastic" +) + +// DetailedError creates a more detailed error if the error stack contains elastic.Error. +// This is useful because by default olivere/elastic returns errors that print like this: +// +// elastic: Error 400 (Bad Request): all shards failed [type=search_phase_execution_exception] +// +// This is pretty useless because it masks the underlying root cause. +// DetailedError would instead return an error like this: +// +// : RootCause[... detailed error message ...] +func DetailedError(err error) error { + var esErr *elastic.Error + if errors.As(err, &esErr) { + if esErr.Details != nil && len(esErr.Details.RootCause) > 0 { + rc := esErr.Details.RootCause[0] + if rc != nil { + return fmt.Errorf("%w: RootCause[%s [type=%s]]", err, rc.Reason, rc.Type) + } + } + } + return err +} diff --git a/pkg/es/errors_test.go b/pkg/es/errors_test.go new file mode 100644 index 00000000000..d05398c62b1 --- /dev/null +++ b/pkg/es/errors_test.go @@ -0,0 +1,35 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package es + +import ( + "fmt" + "testing" + + "github.com/olivere/elastic" + "github.com/stretchr/testify/assert" +) + +func TestDetailedError(t *testing.T) { + assert.ErrorContains(t, fmt.Errorf("some err"), "some err", "no panic") + + esErr := &elastic.Error{ + Status: 400, + Details: &elastic.ErrorDetails{ + Type: "type1", + Reason: "useless reason, e.g. all shards failed", + RootCause: []*elastic.ErrorDetails{ + { + Type: "type2", + Reason: "actual reason", + }, + }, + }, + } + assert.ErrorContains(t, DetailedError(esErr), "actual reason") + + esErr.Details.RootCause[0] = nil + assert.ErrorContains(t, DetailedError(esErr), "useless reason") + assert.NotContains(t, DetailedError(esErr).Error(), "actual reason") +} diff --git a/plugin/storage/es/spanstore/reader.go b/plugin/storage/es/spanstore/reader.go index 1fdce799bc0..1285c399529 100644 --- a/plugin/storage/es/spanstore/reader.go +++ b/plugin/storage/es/spanstore/reader.go @@ -246,7 +246,7 @@ func (s *SpanReader) GetTrace(ctx context.Context, traceID model.TraceID) (*mode currentTime := time.Now() traces, err := s.multiRead(ctx, []model.TraceID{traceID}, currentTime.Add(-s.maxSpanAge), currentTime) if err != nil { - return nil, err + return nil, es.DetailedError(err) } if len(traces) == 0 { return nil, spanstore.ErrTraceNotFound @@ -337,7 +337,7 @@ func (s *SpanReader) FindTraces(ctx context.Context, traceQuery *spanstore.Trace uniqueTraceIDs, err := s.FindTraceIDs(ctx, traceQuery) if err != nil { - return nil, err + return nil, es.DetailedError(err) } return s.multiRead(ctx, uniqueTraceIDs, traceQuery.StartTimeMin, traceQuery.StartTimeMax) } @@ -412,6 +412,7 @@ func (s *SpanReader) multiRead(ctx context.Context, traceIDs []model.TraceID, st traceIDs = nil results, err := s.client().MultiSearch().Add(searchRequests...).Index(indices...).Do(ctx) if err != nil { + err = es.DetailedError(err) logErrorToSpan(childSpan, err) return nil, err } @@ -426,6 +427,7 @@ func (s *SpanReader) multiRead(ctx context.Context, traceIDs []model.TraceID, st } spans, err := s.collectSpans(result.Hits.Hits) if err != nil { + err = es.DetailedError(err) logErrorToSpan(childSpan, err) return nil, err } @@ -580,6 +582,7 @@ func (s *SpanReader) findTraceIDs(ctx context.Context, traceQuery *spanstore.Tra searchResult, err := searchService.Do(ctx) if err != nil { + err = es.DetailedError(err) s.logger.Info("es search services failed", zap.Any("traceQuery", traceQuery), zap.Error(err)) return nil, fmt.Errorf("search services failed: %w", err) } diff --git a/plugin/storage/es/spanstore/service_operation.go b/plugin/storage/es/spanstore/service_operation.go index d2303196e6c..a6c320b3f1c 100644 --- a/plugin/storage/es/spanstore/service_operation.go +++ b/plugin/storage/es/spanstore/service_operation.go @@ -87,7 +87,7 @@ func (s *ServiceOperationStorage) getServices(context context.Context, indices [ searchResult, err := searchService.Do(context) if err != nil { - return nil, fmt.Errorf("search services failed: %w", err) + return nil, fmt.Errorf("search services failed: %w", es.DetailedError(err)) } if searchResult.Aggregations == nil { return []string{}, nil @@ -118,7 +118,7 @@ func (s *ServiceOperationStorage) getOperations(context context.Context, indices searchResult, err := searchService.Do(context) if err != nil { - return nil, fmt.Errorf("search operations failed: %w", err) + return nil, fmt.Errorf("search operations failed: %w", es.DetailedError(err)) } if searchResult.Aggregations == nil { return []string{}, nil From 864eb7064b0649438e349732842db7862b593240 Mon Sep 17 00:00:00 2001 From: Yuri Shkuro Date: Sun, 18 Feb 2024 17:23:28 -0500 Subject: [PATCH 2/2] s/assert/require Signed-off-by: Yuri Shkuro --- pkg/es/errors_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/es/errors_test.go b/pkg/es/errors_test.go index d05398c62b1..e57ec1403b2 100644 --- a/pkg/es/errors_test.go +++ b/pkg/es/errors_test.go @@ -8,11 +8,11 @@ import ( "testing" "github.com/olivere/elastic" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestDetailedError(t *testing.T) { - assert.ErrorContains(t, fmt.Errorf("some err"), "some err", "no panic") + require.ErrorContains(t, fmt.Errorf("some err"), "some err", "no panic") esErr := &elastic.Error{ Status: 400, @@ -27,9 +27,9 @@ func TestDetailedError(t *testing.T) { }, }, } - assert.ErrorContains(t, DetailedError(esErr), "actual reason") + require.ErrorContains(t, DetailedError(esErr), "actual reason") esErr.Details.RootCause[0] = nil - assert.ErrorContains(t, DetailedError(esErr), "useless reason") - assert.NotContains(t, DetailedError(esErr).Error(), "actual reason") + require.ErrorContains(t, DetailedError(esErr), "useless reason") + require.NotContains(t, DetailedError(esErr).Error(), "actual reason") }