diff --git a/CHANGELOG-3.2.md b/CHANGELOG-3.2.md index 111e9bed669..9cccc367efb 100644 --- a/CHANGELOG-3.2.md +++ b/CHANGELOG-3.2.md @@ -38,6 +38,14 @@ See [code changes](https://github.com/coreos/etcd/compare/v3.2.16...v3.2.17) and - Server now returns `rpctypes.ErrLeaseTTLTooLarge` to client, when the requested `TTL` is larger than *9,000,000,000 seconds* (which is >285 years). - Again, etcd `Lease` is meant for short-periodic keepalives or sessions, in the range of seconds or minutes. Not for hours or days! - Enable etcd server [`raft.Config.CheckQuorum` when starting with `ForceNewCluster`](https://github.com/coreos/etcd/pull/9347). +- Add [`etcd --unsafe-overwrite-db`](https://github.com/coreos/etcd/pull/9484) flag, to support [migration from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480). + - etcd server panics if it tries to restore from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. + - This happens when the server had migrated from v2 with no previous v3 data. + - This is to prevent accidental v3 data loss (e.g. `db` file might have been moved). + - With `--unsafe-overwrite-db` enabled, etcd allows to create a fresh `db` file on reboot, when there are existing snapshots. + - To continue to use etcd without v3 data, keep `--unsafe-overwrite-db` enabled. Or write some test v3 keys (e.g. `ETCDCTL_API=3 etcdctl put foo bar`), and then restart etcd with `--unsafe-overwrite-db` disabled. + - Use this flag only for v2 migration. Otherwise, previous v3 data could be overwritten with existing snapshots. + - v4 will deprecate this flag. ### Go diff --git a/CHANGELOG-3.3.md b/CHANGELOG-3.3.md index 5468ddbb38e..b7f83923814 100644 --- a/CHANGELOG-3.3.md +++ b/CHANGELOG-3.3.md @@ -19,6 +19,17 @@ See [code changes](https://github.com/coreos/etcd/compare/v3.3.2...v3.3.3) and [ - For instance, when hourly writes are 100 and `--auto-compaction-mode=periodic --auto-compaction-retention=24h`, `v3.2.x`, `v3.3.0`, `v3.3.1`, and `v3.3.2` compact revision 2400, 2640, and 2880 for every 2.4-hour, while `v3.3.3` *or later* compacts revision 2400, 2500, 2600 for every 1-hour. - Futhermore, when `--auto-compaction-mode=periodic --auto-compaction-retention=30m` and writes per minute are about 1000, `v3.3.0`, `v3.3.1`, and `v3.3.2` compact revision 30000, 33000, and 36000, for every 3-minute, while `v3.3.3` *or later* compacts revision 30000, 60000, and 90000, for every 30-minute. +### Fixed: v3 + +- Add [`etcd --unsafe-overwrite-db`](https://github.com/coreos/etcd/pull/9484) flag, to support [migration from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480). + - etcd server panics if it tries to restore from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. + - This happens when the server had migrated from v2 with no previous v3 data. + - This is to prevent accidental v3 data loss (e.g. `db` file might have been moved). + - With `--unsafe-overwrite-db` enabled, etcd allows to create a fresh `db` file on reboot, when there are existing snapshots. + - To continue to use etcd without v3 data, keep `--unsafe-overwrite-db` enabled. Or write some test v3 keys (e.g. `ETCDCTL_API=3 etcdctl put foo bar`), and then restart etcd with `--unsafe-overwrite-db` disabled. + - Use this flag only for v2 migration. Otherwise, previous v3 data could be overwritten with existing snapshots. + - v4 will deprecate this flag. + ### Metrics, Monitoring - Add missing [`etcd_network_peer_sent_failures_total` count](https://github.com/coreos/etcd/pull/9437). diff --git a/CHANGELOG-3.4.md b/CHANGELOG-3.4.md index 40e53b73467..cf0ef28987d 100644 --- a/CHANGELOG-3.4.md +++ b/CHANGELOG-3.4.md @@ -93,6 +93,14 @@ See [security doc](https://github.com/coreos/etcd/blob/master/Documentation/op-g - For instance, a flaky(or rejoining) member may drop in and out, and start campaign. This member will end up with a higher term, and ignore all incoming messages with lower term. In this case, a new leader eventually need to get elected, thus disruptive to cluster availability. Raft implements Pre-Vote phase to prevent this kind of disruptions. If enabled, Raft runs an additional phase of election to check if pre-candidate can get enough votes to win an election. - `--pre-vote=false` by default. - v3.5 will enable `--pre-vote=true` by default. +- Add [`--unsafe-overwrite-db`](https://github.com/coreos/etcd/pull/9484) flag, to support [migration from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480). + - etcd server panics if it tries to restore from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. + - This happens when the server had migrated from v2 with no previous v3 data. + - This is to prevent accidental v3 data loss (e.g. `db` file might have been moved). + - With `--unsafe-overwrite-db` enabled, etcd allows to create a fresh `db` file on reboot, when there are existing snapshots. + - To continue to use etcd without v3 data, keep `--unsafe-overwrite-db` enabled. Or write some test v3 keys (e.g. `ETCDCTL_API=3 etcdctl put foo bar`), and then restart etcd with `--unsafe-overwrite-db` disabled. + - Use this flag only for v2 migration. Otherwise, previous v3 data could be overwritten with existing snapshots. + - v4 will deprecate this flag. - TODO: [`--initial-corrupt-check`](TODO) flag is now stable (`--experimental-initial-corrupt-check` is deprecated). - `--initial-corrupt-check=true` by default, to check cluster database hashes before serving client/peer traffic. - TODO: [`--corrupt-check-time`](TODO) flag is now stable (`--experimental-corrupt-check-time` is deprecated). diff --git a/Documentation/upgrades/upgrade_3_2.md b/Documentation/upgrades/upgrade_3_2.md index 3f36d0fccac..956b5cee507 100644 --- a/Documentation/upgrades/upgrade_3_2.md +++ b/Documentation/upgrades/upgrade_3_2.md @@ -212,6 +212,8 @@ See [issue #6336](https://github.com/coreos/etcd/issues/6336) for more contexts. ### Server upgrade checklists +**NOTE:** `v3.2.18` added [`etcd --unsafe-overwrite-db`](https://github.com/coreos/etcd/pull/9484) flag, to support [migration from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480). etcd server panics if it tries to restore from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This is to prevent accidental v3 data loss (e.g. `db` file might have been moved). With `--unsafe-overwrite-db` enabled, etcd allows to create a fresh `db` file on reboot, when there are existing snapshots. To continue to use etcd without v3 data, keep `--unsafe-overwrite-db` enabled. Or write some test v3 keys (e.g. `ETCDCTL_API=3 etcdctl put foo bar`), and then restart etcd with `--unsafe-overwrite-db` disabled. Use this flag only for v2 migration. Otherwise, previous v3 data could be overwritten with existing snapshots. v4 will deprecate this flag. + #### Upgrade requirements To upgrade an existing etcd deployment to 3.2, the running cluster must be 3.1 or greater. If it's before 3.1, please [upgrade to 3.1](upgrade_3_1.md) before upgrading to 3.2. diff --git a/embed/config.go b/embed/config.go index c61729a6439..55691b4953f 100644 --- a/embed/config.go +++ b/embed/config.go @@ -205,9 +205,6 @@ type Config struct { ListenMetricsUrls []url.URL ListenMetricsUrlsJSON string `json:"listen-metrics-urls"` - // ForceNewCluster starts a new cluster even if previously started; unsafe. - ForceNewCluster bool `json:"force-new-cluster"` - // UserHandlers is for registering users handlers and only used for // embedding etcd into other applications. // The map key is the route path for the handler, and @@ -227,6 +224,25 @@ type Config struct { ExperimentalInitialCorruptCheck bool `json:"experimental-initial-corrupt-check"` ExperimentalCorruptCheckTime time.Duration `json:"experimental-corrupt-check-time"` ExperimentalEnableV2V3 string `json:"experimental-enable-v2v3"` + + // ForceNewCluster starts a new cluster even if previously started; unsafe. + ForceNewCluster bool `json:"force-new-cluster"` + + // UnsafeOverwriteDB is set to allow unsafe "db" file overwrites + // from existing snapshots. + // + // "db" file overwrite can happen: + // 1. upgrade from v2 to v3.2, with snapshot but no v3 data + // 2. previous "db" file deletion + // + // In both cases, "db" gets created anew, thus previous data, if any, + // gets overwritten. In case 2, server will panic to prevent accidental + // data loss (e.g. "db" file may have been moved). + // + // Enable this only for v2 to v3.2 migration. + // + // TODO: remove this once v2 migration support is dropped. + UnsafeOverwriteDB bool `json:"unsafe-overwrite-db"` } // configYAML holds the config suitable for yaml parsing @@ -286,6 +302,7 @@ func NewConfig() *Config { HostWhitelist: defaultHostWhitelist, AuthToken: "simple", PreVote: false, // TODO: enable by default in v3.5 + UnsafeOverwriteDB: false, } cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name) return cfg diff --git a/embed/etcd.go b/embed/etcd.go index 029d5fb6e3f..38fa5cfc49d 100644 --- a/embed/etcd.go +++ b/embed/etcd.go @@ -157,7 +157,6 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { DiscoveryURL: cfg.Durl, DiscoveryProxy: cfg.Dproxy, NewCluster: cfg.IsNewCluster(), - ForceNewCluster: cfg.ForceNewCluster, PeerTLSInfo: cfg.PeerTLSInfo, TickMs: cfg.TickMs, ElectionTicks: cfg.ElectionTicks(), @@ -173,6 +172,8 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { CorruptCheckTime: cfg.ExperimentalCorruptCheckTime, PreVote: cfg.PreVote, Debug: cfg.Debug, + ForceNewCluster: cfg.ForceNewCluster, + UnsafeOverwriteDB: cfg.UnsafeOverwriteDB, } srvcfg.HostWhitelist = make(map[string]struct{}, len(cfg.HostWhitelist)) diff --git a/etcdmain/config.go b/etcdmain/config.go index 7abd105b798..5ec969e2c49 100644 --- a/etcdmain/config.go +++ b/etcdmain/config.go @@ -193,9 +193,6 @@ func newConfig() *config { fs.StringVar(&cfg.ec.LogPkgLevels, "log-package-levels", "", "Specify a particular log level for each etcd package (eg: 'etcdmain=CRITICAL,etcdserver=DEBUG').") fs.StringVar(&cfg.ec.LogOutput, "log-output", embed.DefaultLogOutput, "Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd.") - // unsafe - fs.BoolVar(&cfg.ec.ForceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster.") - // version fs.BoolVar(&cfg.printVersion, "version", false, "Print the version and exit.") @@ -216,6 +213,10 @@ func newConfig() *config { fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.") fs.StringVar(&cfg.ec.ExperimentalEnableV2V3, "experimental-enable-v2v3", cfg.ec.ExperimentalEnableV2V3, "v3 prefix for serving emulated v2 state.") + // unsafe + fs.BoolVar(&cfg.ec.ForceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster.") + fs.BoolVar(&cfg.ec.UnsafeOverwriteDB, "unsafe-overwrite-db", false, "Allow unsafe 'db' file overwrites (only use for v2 migration to v3).") + // ignored for _, f := range cfg.ignored { fs.Var(&flags.IgnoredFlag{Name: f}, f, "") diff --git a/etcdmain/help.go b/etcdmain/help.go index 582d36bff72..5d3f81e4626 100644 --- a/etcdmain/help.go +++ b/etcdmain/help.go @@ -166,14 +166,6 @@ logging flags --log-output 'default' specify 'stdout' or 'stderr' to skip journald logging even when running under systemd. -unsafe flags: - -Please be CAUTIOUS when using unsafe flags because it will break the guarantees -given by the consensus protocol. - - --force-new-cluster 'false' - force to create a new one-member cluster. - profiling flags: --enable-pprof 'false' Enable runtime profiling data via HTTP server. Address is at client URL + "/debug/pprof/" @@ -193,5 +185,15 @@ experimental flags: duration of time between cluster corruption check passes. --experimental-enable-v2v3 '' serve v2 requests through the v3 backend under a given prefix. + + +Please be CAUTIOUS when using unsafe flags because it will break the guarantees +given by the consensus protocol. + +unsafe flags: + --force-new-cluster 'false' + force to create a new one-member cluster. + --unsafe-overwrite-db 'false' + allow unsafe 'db' file overwrites (only use for v2 migration to v3). ` ) diff --git a/etcdserver/backend.go b/etcdserver/backend.go index 97e780980d4..06e4a77537b 100644 --- a/etcdserver/backend.go +++ b/etcdserver/backend.go @@ -69,11 +69,12 @@ func openBackend(cfg ServerConfig) backend.Backend { // before updating the backend db after persisting raft snapshot to disk, // violating the invariant snapshot.Metadata.Index < db.consistentIndex. In this // case, replace the db with the snapshot db sent by the leader. +// if "UnsafeOverwriteDB == true", it will overwrite existing "db" file, if any func recoverSnapshotBackend(cfg ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) { var cIndex consistentIndex kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex) defer kv.Close() - if snapshot.Metadata.Index <= kv.ConsistentIndex() { + if (cfg.UnsafeOverwriteDB && kv.ConsistentIndex() == 0) || snapshot.Metadata.Index <= kv.ConsistentIndex() { return oldbe, nil } oldbe.Close() diff --git a/etcdserver/config.go b/etcdserver/config.go index 7a362aa133d..32a21d500da 100644 --- a/etcdserver/config.go +++ b/etcdserver/config.go @@ -44,7 +44,6 @@ type ServerConfig struct { InitialPeerURLsMap types.URLsMap InitialClusterToken string NewCluster bool - ForceNewCluster bool PeerTLSInfo transport.TLSInfo // HostWhitelist lists acceptable hostnames from client requests. @@ -80,6 +79,12 @@ type ServerConfig struct { PreVote bool Debug bool + + ForceNewCluster bool + + // UnsafeOverwriteDB is set to allow unsafe "db" file overwrites + // from existing snapshots. + UnsafeOverwriteDB bool } // VerifyBootstrap sanity-checks the initial config for bootstrap case diff --git a/etcdserver/server.go b/etcdserver/server.go index 5a27467260e..66b87d1b396 100644 --- a/etcdserver/server.go +++ b/etcdserver/server.go @@ -385,15 +385,41 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { if err != nil && err != raftsnap.ErrNoSnapshot { return nil, err } + unsafeOverwrite := false if snapshot != nil { + if !beExist { + // snapshot exists but had no "db" file before! + // "db" file was created anew or overwritten + plog.Warningf("started with snapshot but %q had not existed", bepath) + + // 1. had upgraded from v2 to v3.2, with existing v2 snapshot + // - but no previous v3 data + // - nothing to recover (consistent index = 0) + // - use the newly created "db" file for future v3 API use + // - only allow when explicitly configured + if cfg.UnsafeOverwriteDB { + unsafeOverwrite = true + plog.Warningf("%q has been created anew from existing snapshots (e.g. migrated v2 server)", bepath) + plog.Warning("'--unsafe-overwrite-db' is set, so starting from fresh db file") + } else { + // 2. accidental "db" file deletion with existing v3 snapshot + // - original "db" file has been overwritten by a new "db" file! + // - panic! + plog.Warningf("%q had been accidentally deleted (now overwritten)", bepath) + plog.Panic("consider '--unsafe-overwrite-db' flag to override and start with fresh data") + } + } + if err = st.Recovery(snapshot.Data); err != nil { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) + if be, err = recoverSnapshotBackend(cfg, be, *snapshot); err != nil { plog.Panicf("recovering backend from snapshot error: %v", err) } } + cfg.Print() if !cfg.ForceNewCluster { id, cl, n, s, w = restartNode(cfg, snapshot) @@ -403,7 +429,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) { cl.SetStore(st) cl.SetBackend(be) cl.Recover(api.UpdateCapability) - if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { + if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist && !unsafeOverwrite { os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } diff --git a/integration/cluster.go b/integration/cluster.go index d8f36b87f48..9d5cc44f92e 100644 --- a/integration/cluster.go +++ b/integration/cluster.go @@ -100,6 +100,7 @@ type ClusterConfig struct { DiscoveryURL string UseGRPC bool QuotaBackendBytes int64 + SnapshotCount uint64 MaxTxnOps uint MaxRequestBytes uint GRPCKeepAliveMinTime time.Duration @@ -241,6 +242,7 @@ func (c *cluster) mustNewMember(t *testing.T) *member { peerTLS: c.cfg.PeerTLS, clientTLS: c.cfg.ClientTLS, quotaBackendBytes: c.cfg.QuotaBackendBytes, + snapshotCount: c.cfg.SnapshotCount, maxTxnOps: c.cfg.MaxTxnOps, maxRequestBytes: c.cfg.MaxRequestBytes, grpcKeepAliveMinTime: c.cfg.GRPCKeepAliveMinTime, @@ -382,6 +384,7 @@ func (c *cluster) waitLeader(t *testing.T, membs []*member) int { possibleLead := make(map[uint64]bool) var lead uint64 for _, m := range membs { + fmt.Println("waitLeader:", m.s == nil) possibleLead[uint64(m.s.ID())] = true } cc := MustNewHTTPClient(t, getMembersURLs(membs), nil) @@ -520,6 +523,7 @@ type memberConfig struct { peerTLS *transport.TLSInfo clientTLS *transport.TLSInfo quotaBackendBytes int64 + snapshotCount uint64 maxTxnOps uint maxRequestBytes uint grpcKeepAliveMinTime time.Duration @@ -574,6 +578,7 @@ func mustNewMember(t *testing.T, mcfg memberConfig) *member { m.ElectionTicks = electionTicks m.TickMs = uint(tickDuration / time.Millisecond) m.QuotaBackendBytes = mcfg.quotaBackendBytes + m.SnapCount = mcfg.snapshotCount m.MaxTxnOps = mcfg.maxTxnOps if m.MaxTxnOps == 0 { m.MaxTxnOps = embed.DefaultMaxTxnOps diff --git a/integration/v3_cluster_test.go b/integration/v3_cluster_test.go new file mode 100644 index 00000000000..fe0e604a9c2 --- /dev/null +++ b/integration/v3_cluster_test.go @@ -0,0 +1,42 @@ +// Copyright 2018 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package integration + +import ( + "os" + "path/filepath" + "testing" + + "github.com/coreos/etcd/pkg/testutil" +) + +func TestV3UnsafeOverwriteDB(t *testing.T) { + defer testutil.AfterTest(t) + + clus := NewClusterV3(t, &ClusterConfig{ + Size: 1, + SnapshotCount: 1, + }) + defer clus.Terminate(t) + + bepath := filepath.Join(clus.Members[0].SnapDir(), "db") + clus.Members[0].Stop(t) + + os.RemoveAll(bepath) + clus.Members[0].UnsafeOverwriteDB = true + + clus.Members[0].Restart(t) + clus.WaitLeader(t) +}