Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvserver: improve logging for follower reads #51523

Merged
merged 2 commits into from
Jul 24, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 46 additions & 38 deletions pkg/kv/kvserver/replica_follower_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,61 +39,69 @@ var FollowerReadsEnabled = settings.RegisterPublicBoolSetting(
func (r *Replica) canServeFollowerRead(
ctx context.Context, ba *roachpb.BatchRequest, pErr *roachpb.Error,
) *roachpb.Error {
canServeFollowerRead := false
if lErr, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); ok &&
lErr, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError)
eligible := ok &&
lErr.LeaseHolder != nil && lErr.Lease.Type() == roachpb.LeaseEpoch &&
(!ba.IsLocking() && ba.IsAllTransactional()) && // followerreadsccl.batchCanBeEvaluatedOnFollower
(ba.Txn == nil || !ba.Txn.IsLocking()) && // followerreadsccl.txnCanPerformFollowerRead
FollowerReadsEnabled.Get(&r.store.cfg.Settings.SV) {
FollowerReadsEnabled.Get(&r.store.cfg.Settings.SV)

// There's no known reason that a non-VOTER_FULL replica couldn't serve follower
// reads (or RangeFeed), but as of the time of writing, these are expected
// to be short-lived, so it's not worth working out the edge-cases. Revisit if
// we add long-lived learners or feel that incoming/outgoing voters also need
// to be able to serve follower reads.
repDesc, err := r.GetReplicaDescriptor()
if err != nil {
return roachpb.NewError(err)
}
if typ := repDesc.GetType(); typ != roachpb.VOTER_FULL {
log.Eventf(ctx, "%s replicas cannot serve follower reads", typ)
return pErr
}
if !eligible {
// We couldn't do anything with the error, propagate it.
return pErr
}

// There's no known reason that a non-VOTER_FULL replica couldn't serve follower
// reads (or RangeFeed), but as of the time of writing, these are expected
// to be short-lived, so it's not worth working out the edge-cases. Revisit if
// we add long-lived learners or feel that incoming/outgoing voters also need
// to be able to serve follower reads.
repDesc, err := r.GetReplicaDescriptor()
if err != nil {
return roachpb.NewError(err)
}
if typ := repDesc.GetType(); typ != roachpb.VOTER_FULL {
log.Eventf(ctx, "%s replicas cannot serve follower reads", typ)
return pErr
}

ts := ba.Timestamp
if ba.Txn != nil {
ts.Forward(ba.Txn.MaxTimestamp)
}

ts := ba.Timestamp
maxClosed, _ := r.maxClosed(ctx)
canServeFollowerRead := ts.LessEq(maxClosed)
tsDiff := ts.GoTime().Sub(maxClosed.GoTime())
if !canServeFollowerRead {
maxTsStr := "n/a"
if ba.Txn != nil {
ts.Forward(ba.Txn.MaxTimestamp)
maxTsStr = ba.Txn.MaxTimestamp.String()
}

maxClosed, _ := r.maxClosed(ctx)
canServeFollowerRead = ts.LessEq(maxClosed)
if !canServeFollowerRead {
// We can't actually serve the read based on the closed timestamp.
// Signal the clients that we want an update so that future requests can succeed.
r.store.cfg.ClosedTimestamp.Clients.Request(lErr.LeaseHolder.NodeID, r.RangeID)
// We can't actually serve the read based on the closed timestamp.
// Signal the clients that we want an update so that future requests can succeed.
r.store.cfg.ClosedTimestamp.Clients.Request(lErr.LeaseHolder.NodeID, r.RangeID)
log.Eventf(ctx, "can't serve follower read; closed timestamp too low by: %s; maxClosed: %s ts: %s maxTS: %s",
tsDiff, maxClosed, ba.Timestamp, maxTsStr)

if false {
// NB: this can't go behind V(x) because the log message created by the
// storage might be gigantic in real clusters, and we don't want to trip it
// using logspy.
log.Warningf(ctx, "can't serve follower read for %s at epo %d, storage is %s",
ba.Timestamp, lErr.Lease.Epoch,
r.store.cfg.ClosedTimestamp.Storage.(*ctstorage.MultiStorage).StringForNodes(lErr.LeaseHolder.NodeID),
)
}
if false {
// NB: this can't go behind V(x) because the log message created by the
// storage might be gigantic in real clusters, and we don't want to trip it
// using logspy.
log.Warningf(ctx, "can't serve follower read for %s at epo %d, storage is %s",
ba.Timestamp, lErr.Lease.Epoch,
r.store.cfg.ClosedTimestamp.Storage.(*ctstorage.MultiStorage).StringForNodes(lErr.LeaseHolder.NodeID),
)
}
}

if !canServeFollowerRead {
// We couldn't do anything with the error, propagate it.
return pErr
}

// This replica can serve this read!
//
// TODO(tschottdorf): once a read for a timestamp T has been served, the replica may
// serve reads for that and smaller timestamps forever.
log.Event(ctx, "serving via follower read")
log.Eventf(ctx, "serving via follower read; query timestamp below closed timestamp by %s", -tsDiff)
r.store.metrics.FollowerReadsCount.Inc(1)
return nil
}
Expand Down