diff --git a/cluster/cluster_configuration.go b/cluster/cluster_configuration.go index 72423d52ed1..c58d3cc8aae 100644 --- a/cluster/cluster_configuration.go +++ b/cluster/cluster_configuration.go @@ -689,7 +689,11 @@ func (self *ClusterConfiguration) Recovery(b []byte) error { self.clusterAdmins = data.Admins self.dbUsers = data.DbUsers self.servers = data.Servers - self.MetaStore.UpdateFromSnapshot(data.MetaStore) + + // if recovering from a snapshot from version 0.7.x the metastore will be nil. Fix #868 https://github.com/influxdb/influxdb/issues/868 + if data.MetaStore != nil { + self.MetaStore.UpdateFromSnapshot(data.MetaStore) + } for _, server := range self.servers { log.Info("Checking whether %s is the local server %s", server.RaftName, self.LocalRaftName) @@ -724,6 +728,10 @@ func (self *ClusterConfiguration) Recovery(b []byte) error { } // map the shards to their spaces self.databaseShardSpaces = data.DatabaseShardSpaces + // we need this if recovering from a snapshot from 0.7.x + if data.DatabaseShardSpaces == nil { + self.databaseShardSpaces = make(map[string][]*ShardSpace) + } for _, spaces := range self.databaseShardSpaces { for _, space := range spaces { spaceShards := make([]*ShardData, 0) diff --git a/integration/migration_test.go b/integration/migration_test.go index 1dcae33a26e..cb7cc3e6548 100644 --- a/integration/migration_test.go +++ b/integration/migration_test.go @@ -26,13 +26,13 @@ are up on S3 so that we can run it later. Just trust that I've run it (this is P // var _ = Suite(&MigrationTestSuite{}) -// func (self *MigrationTestSuite) SetUpSuite(c *C) { -// self.server = NewServer("integration/migration_test.toml", c) +// func (self *MigrationTestSuite) setup(file string, c *C) { +// self.server = NewServer(fmt.Sprintf("integration/%s", file), c) // } -// func (self *MigrationTestSuite) TearDownSuite(c *C) { +// func (self *MigrationTestSuite) teardown(dir string, c *C) { // self.server.Stop() -// dataDir := "migration_data/data" +// dataDir := fmt.Sprintf("./%s/data", "migration_data") // shardDir := filepath.Join(dataDir, migration.OLD_SHARD_DIR) // infos, err := ioutil.ReadDir(shardDir) // if err != nil { @@ -41,13 +41,21 @@ are up on S3 so that we can run it later. Just trust that I've run it (this is P // } // for _, info := range infos { // if info.IsDir() { -// os.Remove(filepath.Join(shardDir, info.Name(), migration.MIGRATED_MARKER)) +// err := os.Remove(filepath.Join(shardDir, info.Name(), migration.MIGRATED_MARKER)) +// if err != nil { +// fmt.Printf("Error Clearing Migration: ", err) +// } // } // } -// os.RemoveAll(filepath.Join(dataDir, datastore.SHARD_DATABASE_DIR)) +// err = os.RemoveAll(filepath.Join(dataDir, datastore.SHARD_DATABASE_DIR)) +// if err != nil { +// fmt.Printf("Error Clearing Migration: ", err) +// } // } // func (self *MigrationTestSuite) TestMigrationOfPreviousDb(c *C) { +// self.setup("migration_test.toml", c) +// defer self.teardown("migration_data", c) // _, err := http.Post("http://localhost:8086/cluster/migrate_data?u=root&p=root", "application/json", nil) // c.Assert(err, IsNil) // // make sure that it won't kick it off a second time while it's already running @@ -84,3 +92,19 @@ are up on S3 so that we can run it later. Just trust that I've run it (this is P // c.Assert(err, IsNil) // time.Sleep(time.Second * 5) // } + +// func (self *MigrationTestSuite) TestDoesntPanicOnPreviousSnapshot(c *C) { +// self.setup("migration_test2.toml", c) +// defer self.teardown("migration_data2", c) +// _, err := http.Post("http://localhost:8086/cluster/migrate_data?u=root&p=root", "application/json", nil) +// c.Assert(err, IsNil) + +// time.Sleep(time.Second * 5) + +// client := self.server.GetClient("test", c) +// s, err := client.Query("select count(value) from foo") +// c.Assert(err, IsNil) +// c.Assert(s, HasLen, 1) +// c.Assert(s[0].Points, HasLen, 1) +// c.Assert(s[0].Points[0][1].(float64), Equals, float64(1)) +// } diff --git a/integration/migration_test2.toml b/integration/migration_test2.toml new file mode 100644 index 00000000000..a496bc94f2a --- /dev/null +++ b/integration/migration_test2.toml @@ -0,0 +1,187 @@ +# Welcome to the InfluxDB configuration file. + +# If hostname (on the OS) doesn't return a name that can be resolved by the other +# systems in the cluster, you'll have to set the hostname to an IP or something +# that can be resolved here. +# hostname = "" + +bind-address = "0.0.0.0" + +# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com +# The data includes raft name (random 8 bytes), os, arch and version +# We don't track ip addresses of servers reporting. This is only used +# to track the number of instances running and the versions which +# is very helpful for us. +# Change this option to true to disable reporting. +reporting-disabled = true + +[logging] +# logging level can be one of "debug", "info", "warn" or "error" +level = "info" +file = "stdout" # stdout to log to standard out + +# Configure the admin server +[admin] +port = 8083 # binding is disabled if the port isn't set +assets = "./admin" + +# Configure the http api +[api] +port = 8086 # binding is disabled if the port isn't set +# ssl-port = 8084 # Ssl support is enabled if you set a port and cert +# ssl-cert = /path/to/cert.pem + +# connections will timeout after this amount of time. Ensures that clients that misbehave +# and keep alive connections they don't use won't end up connection a million times. +# However, if a request is taking longer than this to complete, could be a problem. +read-timeout = "5s" + +[input_plugins] + + # Configure the graphite api + [input_plugins.graphite] + enabled = false + # port = 2003 + # database = "" # store graphite data in this database + # udp_enabled = true # enable udp interface on the same port as the tcp interface + + # Configure the udp api + [input_plugins.udp] + enabled = false + # port = 4444 + # database = "" + + # Configure multiple udp apis each can write to separate db. Just + # repeat the following section to enable multiple udp apis on + # different ports. + [[input_plugins.udp_servers]] # array of tables + enabled = false + # port = 5551 + # database = "db1" + +# Raft configuration +[raft] +# The raft port should be open between all servers in a cluster. +# However, this port shouldn't be accessible from the internet. + +port = 8090 + +# Where the raft logs are stored. The user running InfluxDB will need read/write access. +dir = "integration/migration_data2/raft" + +# election-timeout = "1s" + +[storage] + +dir = "integration/migration_data2/data" +# How many requests to potentially buffer in memory. If the buffer gets filled then writes +# will still be logged and once the local storage has caught up (or compacted) the writes +# will be replayed from the WAL +write-buffer-size = 10000 + +# the engine to use for new shards, old shards will continue to use the same engine +default-engine = "leveldb" + +# The default setting on this is 0, which means unlimited. Set this to something if you want to +# limit the max number of open files. max-open-files is per shard so this * that will be max. +max-open-shards = 0 + +# The default setting is 100. This option tells how many points will be fetched from LevelDb before +# they get flushed into backend. +point-batch-size = 100 + +# The number of points to batch in memory before writing them to leveldb. Lowering this number will +# reduce the memory usage, but will result in slower writes. +write-batch-size = 5000000 + +# The server will check this often for shards that have expired that should be cleared. +retention-sweep-period = "10m" + +[storage.engines.leveldb] + +# Maximum mmap open files, this will affect the virtual memory used by +# the process +max-open-files = 1000 + +# LRU cache size, LRU is used by leveldb to store contents of the +# uncompressed sstables. You can use `m` or `g` prefix for megabytes +# and gigabytes, respectively. +lru-cache-size = "200m" + +[storage.engines.rocksdb] + +# Maximum mmap open files, this will affect the virtual memory used by +# the process +max-open-files = 1000 + +# LRU cache size, LRU is used by rocksdb to store contents of the +# uncompressed sstables. You can use `m` or `g` prefix for megabytes +# and gigabytes, respectively. +lru-cache-size = "200m" + +[storage.engines.hyperleveldb] + +# Maximum mmap open files, this will affect the virtual memory used by +# the process +max-open-files = 1000 + +# LRU cache size, LRU is used by rocksdb to store contents of the +# uncompressed sstables. You can use `m` or `g` prefix for megabytes +# and gigabytes, respectively. +lru-cache-size = "200m" + +[storage.engines.lmdb] + +map-size = "100g" + +[cluster] +# A comma separated list of servers to seed +# this server. this is only relevant when the +# server is joining a new cluster. Otherwise +# the server will use the list of known servers +# prior to shutting down. Any server can be pointed to +# as a seed. It will find the Raft leader automatically. + +# Here's an example. Note that the port on the host is the same as the raft port. +# seed-servers = ["hosta:8090","hostb:8090"] + +# Replication happens over a TCP connection with a Protobuf protocol. +# This port should be reachable between all servers in a cluster. +# However, this port shouldn't be accessible from the internet. + +protobuf_port = 8099 +protobuf_timeout = "2s" # the write timeout on the protobuf conn any duration parseable by time.ParseDuration +protobuf_heartbeat = "200ms" # the heartbeat interval between the servers. must be parseable by time.ParseDuration +protobuf_min_backoff = "1s" # the minimum backoff after a failed heartbeat attempt +protobuf_max_backoff = "10s" # the maxmimum backoff after a failed heartbeat attempt + +# How many write requests to potentially buffer in memory per server. If the buffer gets filled then writes +# will still be logged and once the server has caught up (or come back online) the writes +# will be replayed from the WAL +write-buffer-size = 1000 + +# the maximum number of responses to buffer from remote nodes, if the +# expected number of responses exceed this number then querying will +# happen sequentially and the buffer size will be limited to this +# number +max-response-buffer-size = 100 + +# When queries get distributed out to shards, they go in parallel. This means that results can get buffered +# in memory since results will come in any order, but have to be processed in the correct time order. +# Setting this higher will give better performance, but you'll need more memory. Setting this to 1 will ensure +# that you don't need to buffer in memory, but you won't get the best performance. +concurrent-shard-query-limit = 10 + +[wal] + +dir = "integration/migration_data2/wal" +flush-after = 1000 # the number of writes after which wal will be flushed, 0 for flushing on every write +bookmark-after = 1000 # the number of writes after which a bookmark will be created + +# the number of writes after which an index entry is created pointing +# to the offset of the first request, default to 1k +index-after = 1000 + +# the number of requests per one log file, if new requests came in a +# new log file will be created +requests-per-logfile = 10000