From e67b9829b626b0d79ef687035fe0261ec15b118c Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Tue, 4 Jun 2019 19:30:57 -0700 Subject: [PATCH 1/2] *: enable lease checkpoint via experimental flag Primary lessor persist lease remainingTTL only if experimental flag "--experimental-enable-lease-checkpoint" is set. --- embed/config.go | 2 ++ embed/etcd.go | 1 + etcdmain/config.go | 1 + etcdmain/help.go | 2 ++ etcdserver/config.go | 2 ++ etcdserver/server.go | 9 ++++++--- integration/cluster.go | 4 ++++ integration/v3_lease_test.go | 6 +++++- 8 files changed, 23 insertions(+), 4 deletions(-) diff --git a/embed/config.go b/embed/config.go index 0480acae3a3..616e46cce90 100644 --- a/embed/config.go +++ b/embed/config.go @@ -278,6 +278,8 @@ type Config struct { ExperimentalEnableV2V3 string `json:"experimental-enable-v2v3"` // ExperimentalBackendFreelistType specifies the type of freelist that boltdb backend uses (array and map are supported types). ExperimentalBackendFreelistType string `json:"experimental-backend-bbolt-freelist-type"` + // ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. + ExperimentalEnableLeaseCheckpoint bool `json:"experimental-enable-lease-checkpoint"` // ForceNewCluster starts a new cluster even if previously started; unsafe. ForceNewCluster bool `json:"force-new-cluster"` diff --git a/embed/etcd.go b/embed/etcd.go index d32adbd1543..8fa48f41d11 100644 --- a/embed/etcd.go +++ b/embed/etcd.go @@ -204,6 +204,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { Debug: cfg.Debug, ForceNewCluster: cfg.ForceNewCluster, EnableGRPCGateway: cfg.EnableGRPCGateway, + EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, } print(e.cfg.logger, *cfg, srvcfg, memberInitialized) if e.Server, err = etcdserver.NewServer(srvcfg); err != nil { diff --git a/etcdmain/config.go b/etcdmain/config.go index 37a23a56e29..1b5c992ea0e 100644 --- a/etcdmain/config.go +++ b/etcdmain/config.go @@ -249,6 +249,7 @@ func newConfig() *config { fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.") fs.StringVar(&cfg.ec.ExperimentalEnableV2V3, "experimental-enable-v2v3", cfg.ec.ExperimentalEnableV2V3, "v3 prefix for serving emulated v2 state.") fs.StringVar(&cfg.ec.ExperimentalBackendFreelistType, "experimental-backend-bbolt-freelist-type", cfg.ec.ExperimentalBackendFreelistType, "ExperimentalBackendFreelistType specifies the type of freelist that boltdb backend uses(array and map are supported types)") + fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable to persist lease remaining TTL to prevent indefinite auto-renewal of long lived leases.") // unsafe fs.BoolVar(&cfg.ec.ForceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster.") diff --git a/etcdmain/help.go b/etcdmain/help.go index c8db7d34128..7f022cda900 100644 --- a/etcdmain/help.go +++ b/etcdmain/help.go @@ -202,6 +202,8 @@ Experimental feature: Serve v2 requests through the v3 backend under a given prefix. --experimental-backend-bbolt-freelist-type ExperimentalBackendFreelistType specifies the type of freelist that boltdb backend uses(array and map are supported types). + --experimental-enable-lease-checkpoint + ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. Unsafe feature: --force-new-cluster 'false' diff --git a/etcdserver/config.go b/etcdserver/config.go index b0eaf18e7b9..9597c6cb8b2 100644 --- a/etcdserver/config.go +++ b/etcdserver/config.go @@ -150,6 +150,8 @@ type ServerConfig struct { ForceNewCluster bool + // EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. + EnableLeaseCheckpoint bool // LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints. LeaseCheckpointInterval time.Duration diff --git a/etcdserver/server.go b/etcdserver/server.go index 722922eeff2..95563cba1e4 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -595,9 +595,12 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { return nil, err } - srv.lessor.SetCheckpointer(func(ctx context.Context, cp *pb.LeaseCheckpointRequest) { - srv.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseCheckpoint: cp}) - }) + if srv.Cfg.EnableLeaseCheckpoint { + // setting checkpointer enables lease checkpoint feature. + srv.lessor.SetCheckpointer(func(ctx context.Context, cp *pb.LeaseCheckpointRequest) { + srv.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseCheckpoint: cp}) + }) + } // TODO: move transport initialization near the definition of remote tr := &rafthttp.Transport{ diff --git a/integration/cluster.go b/integration/cluster.go index 73b32cae1cc..7c3e0701891 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -150,6 +150,7 @@ type ClusterConfig struct { // UseIP is true to use only IP for gRPC requests. UseIP bool + EnableLeaseCheckpoint bool LeaseCheckpointInterval time.Duration } @@ -293,6 +294,7 @@ func (c *cluster) mustNewMember(t testing.TB) *member { clientMaxCallSendMsgSize: c.cfg.ClientMaxCallSendMsgSize, clientMaxCallRecvMsgSize: c.cfg.ClientMaxCallRecvMsgSize, useIP: c.cfg.UseIP, + enableLeaseCheckpoint: c.cfg.EnableLeaseCheckpoint, leaseCheckpointInterval: c.cfg.LeaseCheckpointInterval, }) m.DiscoveryURL = c.cfg.DiscoveryURL @@ -581,6 +583,7 @@ type memberConfig struct { clientMaxCallSendMsgSize int clientMaxCallRecvMsgSize int useIP bool + enableLeaseCheckpoint bool leaseCheckpointInterval time.Duration } @@ -672,6 +675,7 @@ func mustNewMember(t testing.TB, mcfg memberConfig) *member { m.clientMaxCallSendMsgSize = mcfg.clientMaxCallSendMsgSize m.clientMaxCallRecvMsgSize = mcfg.clientMaxCallRecvMsgSize m.useIP = mcfg.useIP + m.EnableLeaseCheckpoint = mcfg.enableLeaseCheckpoint m.LeaseCheckpointInterval = mcfg.leaseCheckpointInterval m.InitialCorruptCheck = true diff --git a/integration/v3_lease_test.go b/integration/v3_lease_test.go index 7b378f0c92f..29e9d4c763b 100644 --- a/integration/v3_lease_test.go +++ b/integration/v3_lease_test.go @@ -230,7 +230,11 @@ func TestV3LeaseCheckpoint(t *testing.T) { var ttl int64 = 300 leaseInterval := 2 * time.Second defer testutil.AfterTest(t) - clus := NewClusterV3(t, &ClusterConfig{Size: 3, LeaseCheckpointInterval: leaseInterval}) + clus := NewClusterV3(t, &ClusterConfig{ + Size: 3, + EnableLeaseCheckpoint: true, + LeaseCheckpointInterval: leaseInterval, + }) defer clus.Terminate(t) // create lease From 5af3723e287f73615362052148a25511f0a6208f Mon Sep 17 00:00:00 2001 From: Jingyi Hu Date: Wed, 5 Jun 2019 15:45:58 -0700 Subject: [PATCH 2/2] CHANGELOG: update changelog-3.4 --- CHANGELOG-3.4.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG-3.4.md b/CHANGELOG-3.4.md index 6bf880b6ef3..23eb5aec283 100644 --- a/CHANGELOG-3.4.md +++ b/CHANGELOG-3.4.md @@ -53,6 +53,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.3.0...v3.4.0) and - Improve [index compaction blocking](https://github.com/etcd-io/etcd/pull/9511) by using a copy on write clone to avoid holding the lock for the traversal of the entire index. - Update [JWT methods](https://github.com/etcd-io/etcd/pull/9883) to allow for use of any supported signature method/algorithm. - Add [Lease checkpointing](https://github.com/etcd-io/etcd/pull/9924) to persist remaining TTLs to the consensus log periodically so that long lived leases progress toward expiry in the presence of leader elections and server restarts. + - Enabled by experimental flag "--experimental-enable-lease-checkpoint". - Add [gRPC interceptor for debugging logs](https://github.com/etcd-io/etcd/pull/9990); enable `etcd --debug` flag to see per-request debug information. - Add [consistency check in snapshot status](https://github.com/etcd-io/etcd/pull/10109). If consistency check on snapshot file fails, `snapshot status` returns `"snapshot file integrity check failed..."` error. - Add [`Verify` function to perform corruption check on WAL contents](https://github.com/etcd-io/etcd/pull/10603).