Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return more detailed errors from ES storage #5209

Merged
merged 2 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pkg/es/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package es

import (
"errors"
"fmt"

"github.com/olivere/elastic"
)

// DetailedError creates a more detailed error if the error stack contains elastic.Error.
// This is useful because by default olivere/elastic returns errors that print like this:
//
// elastic: Error 400 (Bad Request): all shards failed [type=search_phase_execution_exception]
//
// This is pretty useless because it masks the underlying root cause.
// DetailedError would instead return an error like this:
//
// <same as above>: RootCause[... detailed error message ...]
func DetailedError(err error) error {
var esErr *elastic.Error
if errors.As(err, &esErr) {
if esErr.Details != nil && len(esErr.Details.RootCause) > 0 {
rc := esErr.Details.RootCause[0]
if rc != nil {
return fmt.Errorf("%w: RootCause[%s [type=%s]]", err, rc.Reason, rc.Type)
}
}
}
return err
}
35 changes: 35 additions & 0 deletions pkg/es/errors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package es

import (
"fmt"
"testing"

"github.com/olivere/elastic"
"github.com/stretchr/testify/require"
)

func TestDetailedError(t *testing.T) {
require.ErrorContains(t, fmt.Errorf("some err"), "some err", "no panic")

esErr := &elastic.Error{
Status: 400,
Details: &elastic.ErrorDetails{
Type: "type1",
Reason: "useless reason, e.g. all shards failed",
RootCause: []*elastic.ErrorDetails{
{
Type: "type2",
Reason: "actual reason",
},
},
},
}
require.ErrorContains(t, DetailedError(esErr), "actual reason")

esErr.Details.RootCause[0] = nil
require.ErrorContains(t, DetailedError(esErr), "useless reason")
require.NotContains(t, DetailedError(esErr).Error(), "actual reason")
}
7 changes: 5 additions & 2 deletions plugin/storage/es/spanstore/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func (s *SpanReader) GetTrace(ctx context.Context, traceID model.TraceID) (*mode
currentTime := time.Now()
traces, err := s.multiRead(ctx, []model.TraceID{traceID}, currentTime.Add(-s.maxSpanAge), currentTime)
if err != nil {
return nil, err
return nil, es.DetailedError(err)
}
if len(traces) == 0 {
return nil, spanstore.ErrTraceNotFound
Expand Down Expand Up @@ -337,7 +337,7 @@ func (s *SpanReader) FindTraces(ctx context.Context, traceQuery *spanstore.Trace

uniqueTraceIDs, err := s.FindTraceIDs(ctx, traceQuery)
if err != nil {
return nil, err
return nil, es.DetailedError(err)
}
return s.multiRead(ctx, uniqueTraceIDs, traceQuery.StartTimeMin, traceQuery.StartTimeMax)
}
Expand Down Expand Up @@ -412,6 +412,7 @@ func (s *SpanReader) multiRead(ctx context.Context, traceIDs []model.TraceID, st
traceIDs = nil
results, err := s.client().MultiSearch().Add(searchRequests...).Index(indices...).Do(ctx)
if err != nil {
err = es.DetailedError(err)
logErrorToSpan(childSpan, err)
return nil, err
}
Expand All @@ -426,6 +427,7 @@ func (s *SpanReader) multiRead(ctx context.Context, traceIDs []model.TraceID, st
}
spans, err := s.collectSpans(result.Hits.Hits)
if err != nil {
err = es.DetailedError(err)
logErrorToSpan(childSpan, err)
return nil, err
}
Expand Down Expand Up @@ -580,6 +582,7 @@ func (s *SpanReader) findTraceIDs(ctx context.Context, traceQuery *spanstore.Tra

searchResult, err := searchService.Do(ctx)
if err != nil {
err = es.DetailedError(err)
s.logger.Info("es search services failed", zap.Any("traceQuery", traceQuery), zap.Error(err))
return nil, fmt.Errorf("search services failed: %w", err)
}
Expand Down
4 changes: 2 additions & 2 deletions plugin/storage/es/spanstore/service_operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (s *ServiceOperationStorage) getServices(context context.Context, indices [

searchResult, err := searchService.Do(context)
if err != nil {
return nil, fmt.Errorf("search services failed: %w", err)
return nil, fmt.Errorf("search services failed: %w", es.DetailedError(err))
}
if searchResult.Aggregations == nil {
return []string{}, nil
Expand Down Expand Up @@ -118,7 +118,7 @@ func (s *ServiceOperationStorage) getOperations(context context.Context, indices

searchResult, err := searchService.Do(context)
if err != nil {
return nil, fmt.Errorf("search operations failed: %w", err)
return nil, fmt.Errorf("search operations failed: %w", es.DetailedError(err))
}
if searchResult.Aggregations == nil {
return []string{}, nil
Expand Down
Loading