Skip to content

Commit

Permalink
kv: unwrap MixedSuccessError even when retry attempts exhausted
Browse files Browse the repository at this point in the history
Fixes #40328.
Closes #40332.

This commit fixes a bug where MixedSuccessErrors would not be unwrapped
by the txnSpanRefresher when a request had exhausted its refresh attempt
limit.

The theory is that the race test plus the background stats computation
in `TestParallel/create_stats` was causing a lot of transaction retries,
which exhausted `maxTxnRefreshAttempts`.

Release note: None
  • Loading branch information
nvanbenschoten committed Aug 29, 2019
1 parent 2a1cad6 commit b356962
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions pkg/kv/txn_interceptor_span_refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,33 @@ func (sr *txnSpanRefresher) sendLockedWithRefreshAttempts(
ctx context.Context, ba roachpb.BatchRequest, maxRefreshAttempts int,
) (_ *roachpb.BatchResponse, _ *roachpb.Error, largestRefreshTS hlc.Timestamp) {
br, pErr := sr.wrapped.SendLocked(ctx, ba)
if pErr != nil && maxRefreshAttempts > 0 {
br, pErr, largestRefreshTS = sr.maybeRetrySend(ctx, ba, br, pErr, maxRefreshAttempts)
if pErr != nil {
if unwrappedErr, ok := maybeUnwrapMixedSuccessErr(pErr); ok {
log.VEventf(ctx, 2, "got partial success; cannot retry %s (pErr=%s)", ba, unwrappedErr)
return nil, unwrappedErr, hlc.Timestamp{}
}
if maxRefreshAttempts > 0 {
br, pErr, largestRefreshTS = sr.maybeRetrySend(ctx, ba, br, pErr, maxRefreshAttempts)
}
}
return br, pErr, largestRefreshTS
}

// maybeUnwrapMixedSuccessErr attempts to unwrap a mixed success error, and
// returns whether it was successful or not. With mixed success, we can't
// attempt a retry without potentially succeeding at the same conditional put or
// increment request twice; so we return the wrapped error instead. Because the
// dist sender splits up batches to send to multiple ranges in parallel, and
// then combines the results, partial success makes it very difficult to
// determine what can be retried.
func maybeUnwrapMixedSuccessErr(pErr *roachpb.Error) (*roachpb.Error, bool) {
if mse, ok := pErr.GetDetail().(*roachpb.MixedSuccessError); ok {
pErr.SetDetail(mse.GetWrapped())
return pErr, true
}
return pErr, false
}

// maybeRetrySend attempts to catch serializable errors and avoid them by
// refreshing the txn at a larger timestamp. If it succeeds at refreshing the
// txn timestamp, it recurses into sendLockedWithRefreshAttempts and retries the
Expand All @@ -213,18 +234,6 @@ func (sr *txnSpanRefresher) maybeRetrySend(
pErr *roachpb.Error,
maxRefreshAttempts int,
) (*roachpb.BatchResponse, *roachpb.Error, hlc.Timestamp) {
// With mixed success, we can't attempt a retry without potentially
// succeeding at the same conditional put or increment request
// twice; return the wrapped error instead. Because the dist sender
// splits up batches to send to multiple ranges in parallel, and
// then combines the results, partial success makes it very
// difficult to determine what can be retried.
if mse, ok := pErr.GetDetail().(*roachpb.MixedSuccessError); ok {
pErr.SetDetail(mse.GetWrapped())
log.VEventf(ctx, 2, "got partial success; cannot retry %s (pErr=%s)", ba, pErr)
return nil, pErr, hlc.Timestamp{}
}

// Check for an error which can be retried after updating spans.
canRetryTxn, retryTxn := roachpb.CanTransactionRetryAtRefreshedTimestamp(ctx, pErr)
if !canRetryTxn || !sr.canAutoRetry {
Expand Down

0 comments on commit b356962

Please sign in to comment.