From b7addc354953340db17c01774671f9a9569e89b2 Mon Sep 17 00:00:00 2001 From: Vytenis Darulis Date: Thu, 3 Sep 2020 14:10:23 -0400 Subject: [PATCH] Fix races triggered by TestRoundtrip in flushManager and bootstrapManager (#2586) Fix one of the few races that are always triggering on my env with go 1.14 Updates #2540 --- src/dbnode/storage/bootstrap.go | 15 ++++++---- src/dbnode/storage/database.go | 13 ++++---- .../storage/database_bootstrapped_test.go | 30 ++++++++++--------- src/dbnode/storage/flush.go | 16 ++++++---- src/dbnode/storage/flush_test.go | 3 +- src/dbnode/storage/storage_mock.go | 20 ++++++------- src/dbnode/storage/types.go | 10 +++---- 7 files changed, 60 insertions(+), 47 deletions(-) diff --git a/src/dbnode/storage/bootstrap.go b/src/dbnode/storage/bootstrap.go index 846791067f..2c569aee3a 100644 --- a/src/dbnode/storage/bootstrap.go +++ b/src/dbnode/storage/bootstrap.go @@ -31,6 +31,7 @@ import ( "github.com/m3db/m3/src/x/context" xerrors "github.com/m3db/m3/src/x/errors" "github.com/m3db/m3/src/x/instrument" + xtime "github.com/m3db/m3/src/x/time" "github.com/uber-go/tally" "go.uber.org/zap" @@ -85,7 +86,7 @@ type bootstrapManager struct { status tally.Gauge bootstrapDuration tally.Timer durableStatus tally.Gauge - lastBootstrapCompletionTime time.Time + lastBootstrapCompletionTime xtime.UnixNano } func newBootstrapManager( @@ -117,8 +118,11 @@ func (m *bootstrapManager) IsBootstrapped() bool { return state == Bootstrapped } -func (m *bootstrapManager) LastBootstrapCompletionTime() (time.Time, bool) { - return m.lastBootstrapCompletionTime, !m.lastBootstrapCompletionTime.IsZero() +func (m *bootstrapManager) LastBootstrapCompletionTime() (xtime.UnixNano, bool) { + m.RLock() + bsTime := m.lastBootstrapCompletionTime + m.RUnlock() + return bsTime, bsTime > 0 } func (m *bootstrapManager) Bootstrap() (BootstrapResult, error) { @@ -191,8 +195,9 @@ func (m *bootstrapManager) Bootstrap() (BootstrapResult, error) { // load to the cluster. It turns out to be better to let ticking happen naturally // on its own course so that the load of ticking and flushing is more spread out // across the cluster. - - m.lastBootstrapCompletionTime = m.nowFn() + m.Lock() + m.lastBootstrapCompletionTime = xtime.ToUnixNano(m.nowFn()) + m.Unlock() return result, nil } diff --git a/src/dbnode/storage/database.go b/src/dbnode/storage/database.go index fbef19f04f..ab0e31a103 100644 --- a/src/dbnode/storage/database.go +++ b/src/dbnode/storage/database.go @@ -974,10 +974,10 @@ func (d *db) IsBootstrappedAndDurable() bool { return false } - lastBootstrapCompletionTime, ok := d.mediator.LastBootstrapCompletionTime() + lastBootstrapCompletionTimeNano, ok := d.mediator.LastBootstrapCompletionTime() if !ok { d.log.Debug("not bootstrapped and durable because: no last bootstrap completion time", - zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTime)) + zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime())) return false } @@ -985,14 +985,15 @@ func (d *db) IsBootstrappedAndDurable() bool { lastSnapshotStartTime, ok := d.mediator.LastSuccessfulSnapshotStartTime() if !ok { d.log.Debug("not bootstrapped and durable because: no last snapshot start time", - zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTime), - zap.Time("lastSnapshotStartTime", lastSnapshotStartTime), + zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime()), + zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()), ) return false } var ( - hasSnapshottedPostBootstrap = lastSnapshotStartTime.After(lastBootstrapCompletionTime) + lastBootstrapCompletionTime = lastBootstrapCompletionTimeNano.ToTime() + hasSnapshottedPostBootstrap = lastSnapshotStartTime.After(lastBootstrapCompletionTimeNano) hasBootstrappedSinceReceivingNewShards = lastBootstrapCompletionTime.After(d.lastReceivedNewShards) || lastBootstrapCompletionTime.Equal(d.lastReceivedNewShards) isBootstrappedAndDurable = hasSnapshottedPostBootstrap && @@ -1003,7 +1004,7 @@ func (d *db) IsBootstrappedAndDurable() bool { d.log.Debug( "not bootstrapped and durable because: has not snapshotted post bootstrap and/or has not bootstrapped since receiving new shards", zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTime), - zap.Time("lastSnapshotStartTime", lastSnapshotStartTime), + zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()), zap.Time("lastReceivedNewShards", d.lastReceivedNewShards), ) return false diff --git a/src/dbnode/storage/database_bootstrapped_test.go b/src/dbnode/storage/database_bootstrapped_test.go index d7e2a3f9fb..51d1157c9e 100644 --- a/src/dbnode/storage/database_bootstrapped_test.go +++ b/src/dbnode/storage/database_bootstrapped_test.go @@ -25,6 +25,7 @@ import ( "time" "github.com/golang/mock/gomock" + xtime "github.com/m3db/m3/src/x/time" "github.com/stretchr/testify/assert" ) @@ -34,16 +35,17 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { var ( validIsBootstrapped = true - validShardSetAssignedAt = time.Now() - validLastBootstrapCompletionTime = validShardSetAssignedAt.Add(time.Second) - validLastSuccessfulSnapshotStartTime = validLastBootstrapCompletionTime.Add(time.Second) + validShardSetAssignedAt = xtime.ToUnixNano(time.Now()) + validLastBootstrapCompletionTime = xtime.ToUnixNano(validShardSetAssignedAt.ToTime().Add(time.Second)) + validLastSuccessfulSnapshotStartTime = xtime.ToUnixNano(validLastBootstrapCompletionTime.ToTime().Add(time.Second)) + zeroTime xtime.UnixNano ) testCases := []struct { title string isBootstrapped bool - lastBootstrapCompletionTime time.Time - lastSuccessfulSnapshotStartTime time.Time - shardSetAssignedAt time.Time + lastBootstrapCompletionTime xtime.UnixNano + lastSuccessfulSnapshotStartTime xtime.UnixNano + shardSetAssignedAt xtime.UnixNano expectedResult bool }{ { @@ -57,7 +59,7 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { { title: "False if no last bootstrap completion time", isBootstrapped: validIsBootstrapped, - lastBootstrapCompletionTime: time.Time{}, + lastBootstrapCompletionTime: zeroTime, lastSuccessfulSnapshotStartTime: validLastSuccessfulSnapshotStartTime, shardSetAssignedAt: validShardSetAssignedAt, expectedResult: false, @@ -66,7 +68,7 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { title: "False if no last successful snapshot start time", isBootstrapped: validIsBootstrapped, lastBootstrapCompletionTime: validLastBootstrapCompletionTime, - lastSuccessfulSnapshotStartTime: time.Time{}, + lastSuccessfulSnapshotStartTime: zeroTime, shardSetAssignedAt: validShardSetAssignedAt, expectedResult: false, }, @@ -91,7 +93,7 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { isBootstrapped: validIsBootstrapped, lastBootstrapCompletionTime: validLastBootstrapCompletionTime, lastSuccessfulSnapshotStartTime: validLastSuccessfulSnapshotStartTime, - shardSetAssignedAt: validLastBootstrapCompletionTime.Add(time.Second), + shardSetAssignedAt: validLastBootstrapCompletionTime + xtime.UnixNano(xtime.Second), expectedResult: false, }, { @@ -113,7 +115,7 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { mediator := NewMockdatabaseMediator(ctrl) d.mediator = mediator - d.lastReceivedNewShards = tc.shardSetAssignedAt + d.lastReceivedNewShards = tc.shardSetAssignedAt.ToTime() mediator.EXPECT().IsBootstrapped().Return(tc.isBootstrapped) if !tc.isBootstrapped { @@ -122,8 +124,8 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { return } - if tc.lastBootstrapCompletionTime.IsZero() { - mediator.EXPECT().LastBootstrapCompletionTime().Return(time.Time{}, false) + if tc.lastBootstrapCompletionTime == 0 { + mediator.EXPECT().LastBootstrapCompletionTime().Return(zeroTime, false) assert.Equal(t, tc.expectedResult, d.IsBootstrappedAndDurable()) // Early return because other mock calls will not get called. return @@ -131,8 +133,8 @@ func TestDatabaseIsBootstrappedAndDurable(t *testing.T) { mediator.EXPECT().LastBootstrapCompletionTime().Return(tc.lastBootstrapCompletionTime, true) - if tc.lastSuccessfulSnapshotStartTime.IsZero() { - mediator.EXPECT().LastSuccessfulSnapshotStartTime().Return(time.Time{}, false) + if tc.lastSuccessfulSnapshotStartTime == 0 { + mediator.EXPECT().LastSuccessfulSnapshotStartTime().Return(zeroTime, false) assert.Equal(t, tc.expectedResult, d.IsBootstrappedAndDurable()) // Early return because other mock calls will not get called. return diff --git a/src/dbnode/storage/flush.go b/src/dbnode/storage/flush.go index 73ee135117..a941868f1c 100644 --- a/src/dbnode/storage/flush.go +++ b/src/dbnode/storage/flush.go @@ -31,9 +31,11 @@ import ( "github.com/m3db/m3/src/dbnode/persist/fs/commitlog" "github.com/m3db/m3/src/dbnode/retention" xerrors "github.com/m3db/m3/src/x/errors" + xtime "github.com/m3db/m3/src/x/time" "github.com/pborman/uuid" "github.com/uber-go/tally" + "go.uber.org/atomic" "go.uber.org/zap" ) @@ -90,9 +92,10 @@ type flushManager struct { state flushManagerState metrics flushManagerMetrics - lastSuccessfulSnapshotStartTime time.Time - logger *zap.Logger - nowFn clock.NowFn + lastSuccessfulSnapshotStartTime atomic.Int64 // == xtime.UnixNano + + logger *zap.Logger + nowFn clock.NowFn } func newFlushManager( @@ -247,7 +250,7 @@ func (m *flushManager) dataSnapshot( finalErr := multiErr.FinalError() if finalErr == nil { - m.lastSuccessfulSnapshotStartTime = startTime + m.lastSuccessfulSnapshotStartTime.Store(int64(xtime.ToUnixNano(startTime))) } m.metrics.dataSnapshotDuration.Record(m.nowFn().Sub(start)) return finalErr @@ -378,6 +381,7 @@ func (m *flushManager) flushNamespaceWithTimes( return multiErr.FinalError() } -func (m *flushManager) LastSuccessfulSnapshotStartTime() (time.Time, bool) { - return m.lastSuccessfulSnapshotStartTime, !m.lastSuccessfulSnapshotStartTime.IsZero() +func (m *flushManager) LastSuccessfulSnapshotStartTime() (xtime.UnixNano, bool) { + snapTime := xtime.UnixNano(m.lastSuccessfulSnapshotStartTime.Load()) + return snapTime, snapTime > 0 } diff --git a/src/dbnode/storage/flush_test.go b/src/dbnode/storage/flush_test.go index 1e271a7b3f..73fa37ca2e 100644 --- a/src/dbnode/storage/flush_test.go +++ b/src/dbnode/storage/flush_test.go @@ -34,6 +34,7 @@ import ( "github.com/m3db/m3/src/dbnode/retention" "github.com/m3db/m3/src/x/ident" xtest "github.com/m3db/m3/src/x/test" + xtime "github.com/m3db/m3/src/x/time" "github.com/golang/mock/gomock" "github.com/stretchr/testify/require" @@ -554,7 +555,7 @@ func TestFlushManagerFlushSnapshot(t *testing.T) { lastSuccessfulSnapshot, ok := fm.LastSuccessfulSnapshotStartTime() require.True(t, ok) - require.Equal(t, now, lastSuccessfulSnapshot) + require.Equal(t, xtime.ToUnixNano(now), lastSuccessfulSnapshot) } type timesInOrder []time.Time diff --git a/src/dbnode/storage/storage_mock.go b/src/dbnode/storage/storage_mock.go index 7e3098a116..275260d5d3 100644 --- a/src/dbnode/storage/storage_mock.go +++ b/src/dbnode/storage/storage_mock.go @@ -2414,10 +2414,10 @@ func (mr *MockdatabaseBootstrapManagerMockRecorder) IsBootstrapped() *gomock.Cal } // LastBootstrapCompletionTime mocks base method -func (m *MockdatabaseBootstrapManager) LastBootstrapCompletionTime() (time.Time, bool) { +func (m *MockdatabaseBootstrapManager) LastBootstrapCompletionTime() (time0.UnixNano, bool) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "LastBootstrapCompletionTime") - ret0, _ := ret[0].(time.Time) + ret0, _ := ret[0].(time0.UnixNano) ret1, _ := ret[1].(bool) return ret0, ret1 } @@ -2493,10 +2493,10 @@ func (mr *MockdatabaseFlushManagerMockRecorder) Flush(startTime interface{}) *go } // LastSuccessfulSnapshotStartTime mocks base method -func (m *MockdatabaseFlushManager) LastSuccessfulSnapshotStartTime() (time.Time, bool) { +func (m *MockdatabaseFlushManager) LastSuccessfulSnapshotStartTime() (time0.UnixNano, bool) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "LastSuccessfulSnapshotStartTime") - ret0, _ := ret[0].(time.Time) + ret0, _ := ret[0].(time0.UnixNano) ret1, _ := ret[1].(bool) return ret0, ret1 } @@ -2688,10 +2688,10 @@ func (mr *MockdatabaseFileSystemManagerMockRecorder) Report() *gomock.Call { } // LastSuccessfulSnapshotStartTime mocks base method -func (m *MockdatabaseFileSystemManager) LastSuccessfulSnapshotStartTime() (time.Time, bool) { +func (m *MockdatabaseFileSystemManager) LastSuccessfulSnapshotStartTime() (time0.UnixNano, bool) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "LastSuccessfulSnapshotStartTime") - ret0, _ := ret[0].(time.Time) + ret0, _ := ret[0].(time0.UnixNano) ret1, _ := ret[1].(bool) return ret0, ret1 } @@ -3035,10 +3035,10 @@ func (mr *MockdatabaseMediatorMockRecorder) IsBootstrapped() *gomock.Call { } // LastBootstrapCompletionTime mocks base method -func (m *MockdatabaseMediator) LastBootstrapCompletionTime() (time.Time, bool) { +func (m *MockdatabaseMediator) LastBootstrapCompletionTime() (time0.UnixNano, bool) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "LastBootstrapCompletionTime") - ret0, _ := ret[0].(time.Time) + ret0, _ := ret[0].(time0.UnixNano) ret1, _ := ret[1].(bool) return ret0, ret1 } @@ -3143,10 +3143,10 @@ func (mr *MockdatabaseMediatorMockRecorder) Report() *gomock.Call { } // LastSuccessfulSnapshotStartTime mocks base method -func (m *MockdatabaseMediator) LastSuccessfulSnapshotStartTime() (time.Time, bool) { +func (m *MockdatabaseMediator) LastSuccessfulSnapshotStartTime() (time0.UnixNano, bool) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "LastSuccessfulSnapshotStartTime") - ret0, _ := ret[0].(time.Time) + ret0, _ := ret[0].(time0.UnixNano) ret1, _ := ret[1].(bool) return ret0, ret1 } diff --git a/src/dbnode/storage/types.go b/src/dbnode/storage/types.go index 4b1f310dd2..e68d355ff8 100644 --- a/src/dbnode/storage/types.go +++ b/src/dbnode/storage/types.go @@ -727,7 +727,7 @@ type databaseBootstrapManager interface { // LastBootstrapCompletionTime returns the last bootstrap completion time, // if any. - LastBootstrapCompletionTime() (time.Time, bool) + LastBootstrapCompletionTime() (xtime.UnixNano, bool) // Bootstrap performs bootstrapping for all namespaces and shards owned. Bootstrap() (BootstrapResult, error) @@ -749,7 +749,7 @@ type databaseFlushManager interface { // LastSuccessfulSnapshotStartTime returns the start time of the last // successful snapshot, if any. - LastSuccessfulSnapshotStartTime() (time.Time, bool) + LastSuccessfulSnapshotStartTime() (xtime.UnixNano, bool) // Report reports runtime information. Report() @@ -797,7 +797,7 @@ type databaseFileSystemManager interface { // LastSuccessfulSnapshotStartTime returns the start time of the last // successful snapshot, if any. - LastSuccessfulSnapshotStartTime() (time.Time, bool) + LastSuccessfulSnapshotStartTime() (xtime.UnixNano, bool) } // databaseColdFlushManager manages the database related cold flush activities. @@ -867,7 +867,7 @@ type databaseMediator interface { // LastBootstrapCompletionTime returns the last bootstrap completion time, // if any. - LastBootstrapCompletionTime() (time.Time, bool) + LastBootstrapCompletionTime() (xtime.UnixNano, bool) // Bootstrap bootstraps the database with file operations performed at the end. Bootstrap() (BootstrapResult, error) @@ -892,7 +892,7 @@ type databaseMediator interface { // LastSuccessfulSnapshotStartTime returns the start time of the last // successful snapshot, if any. - LastSuccessfulSnapshotStartTime() (time.Time, bool) + LastSuccessfulSnapshotStartTime() (xtime.UnixNano, bool) } // OnColdFlush can perform work each time a series is flushed.