-
Notifications
You must be signed in to change notification settings - Fork 9.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
etcdserver: initial read index implementation #6212
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
package etcdserver | ||
|
||
import ( | ||
"bytes" | ||
"encoding/binary" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
@@ -26,6 +28,9 @@ import ( | |
"github.com/coreos/etcd/lease/leasehttp" | ||
"github.com/coreos/etcd/lease/leasepb" | ||
"github.com/coreos/etcd/mvcc" | ||
"github.com/coreos/etcd/raft" | ||
|
||
"github.com/coreos/go-semver/semver" | ||
"golang.org/x/net/context" | ||
"google.golang.org/grpc/metadata" | ||
) | ||
|
@@ -44,6 +49,10 @@ const ( | |
maxGapBetweenApplyAndCommitIndex = 1000 | ||
) | ||
|
||
var ( | ||
newRangeClusterVersion = *semver.Must(semver.NewVersion("3.1.0")) | ||
) | ||
|
||
type RaftKV interface { | ||
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) | ||
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) | ||
|
@@ -86,6 +95,31 @@ type Authenticator interface { | |
} | ||
|
||
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) { | ||
// TODO: remove this checking when we release etcd 3.2 | ||
if s.ClusterVersion() == nil || s.ClusterVersion().LessThan(newRangeClusterVersion) { | ||
return s.legacyRange(ctx, r) | ||
} | ||
|
||
if !r.Serializable { | ||
err := s.linearizableReadNotify(ctx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} | ||
var resp *pb.RangeResponse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why shuffle this around? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nm, I see it falls through from linearized to serialized once it gets the notify |
||
var err error | ||
chk := func(ai *auth.AuthInfo) error { | ||
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd) | ||
} | ||
get := func() { resp, err = s.applyV3Base.Range(noTxn, r) } | ||
if serr := s.doSerialize(ctx, chk, get); serr != nil { | ||
return nil, serr | ||
} | ||
return resp, err | ||
} | ||
|
||
// TODO: remove this func when we release etcd 3.2 | ||
func (s *EtcdServer) legacyRange(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) { | ||
if r.Serializable { | ||
var resp *pb.RangeResponse | ||
var err error | ||
|
@@ -143,6 +177,7 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse | |
} | ||
return resp, err | ||
} | ||
// TODO: readonly Txn do not need to go through raft | ||
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r}) | ||
if err != nil { | ||
return nil, err | ||
|
@@ -641,3 +676,95 @@ func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.Intern | |
|
||
// Watchable returns a watchable interface attached to the etcdserver. | ||
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() } | ||
|
||
func (s *EtcdServer) linearizableReadLoop() { | ||
var rs raft.ReadState | ||
internalTimeout := time.Second | ||
|
||
for { | ||
ctx := make([]byte, 8) | ||
binary.BigEndian.PutUint64(ctx, s.reqIDGen.Next()) | ||
|
||
select { | ||
case <-s.readwaitc: | ||
case <-s.stopping: | ||
return | ||
} | ||
|
||
nextnr := newNotifier() | ||
|
||
s.readMu.Lock() | ||
nr := s.readNotifier | ||
s.readNotifier = nextnr | ||
s.readMu.Unlock() | ||
|
||
cctx, cancel := context.WithTimeout(context.Background(), internalTimeout) | ||
if err := s.r.ReadIndex(cctx, ctx); err != nil { | ||
cancel() | ||
if err == raft.ErrStopped { | ||
return | ||
} | ||
plog.Errorf("failed to get read index from raft: %v", err) | ||
nr.notify(err) | ||
continue | ||
} | ||
cancel() | ||
|
||
var ( | ||
timeout bool | ||
done bool | ||
) | ||
for !timeout && !done { | ||
select { | ||
case rs = <-s.r.readStateC: | ||
done = bytes.Equal(rs.RequestCtx, ctx) | ||
if !done { | ||
// a previous request might time out. now we should ignore the response of it and | ||
// continue waiting for the response of the current requests. | ||
plog.Warningf("ignored out-of-date read index response (want %v, got %v)", rs.RequestCtx, ctx) | ||
} | ||
case <-time.After(internalTimeout): | ||
plog.Warningf("timed out waiting for read index response") | ||
nr.notify(ErrTimeout) | ||
timeout = true | ||
case <-s.stopping: | ||
return | ||
} | ||
} | ||
if !done { | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happens to |
||
} | ||
|
||
if ai := s.getAppliedIndex(); ai < rs.Index { | ||
select { | ||
case <-s.applyWait.Wait(rs.Index): | ||
case <-s.stopping: | ||
return | ||
} | ||
} | ||
// unblock all l-reads requested at indices before rs.Index | ||
nr.notify(nil) | ||
} | ||
} | ||
|
||
func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error { | ||
s.readMu.RLock() | ||
nc := s.readNotifier | ||
s.readMu.RUnlock() | ||
|
||
// signal linearizable loop for current notify if it hasn't been already | ||
select { | ||
case s.readwaitc <- struct{}{}: | ||
default: | ||
} | ||
|
||
// wait for read state notification | ||
select { | ||
case <-nc.c: | ||
return nc.err | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
case <-s.done: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not a routine etcd server created. This is a per request routine so I assume we should use s.done? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, it doesn't really make a difference |
||
return ErrStopped | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also support l-read
Txn
?