From 104786803d3b82ff10f6217a670bebb616f8f8e3 Mon Sep 17 00:00:00 2001 From: Vivek Menezes Date: Thu, 4 Apr 2019 15:26:20 -0400 Subject: [PATCH] sql: ensure lease expiration is monotonically increasing This is a fix for both a correctness issue and a performance problem. 1. Correctness issue during schema changes: A node could hand an expiration time from a lease to a transaction T, and if the node reacquired a lease for the table at an expiration less than the existing lease's expiration it could release the existing lease (with a greater expiration) before the transaction T committed, thereby invalidating T's deadline. 2. Performance issue: After a lease acquisition if the expiration time of the new lease was less than the existing lease, it would replace the table in the cache with the new lease and release the older lease. But unfortunately it would also not update the table in the table name cache (the table name cache always uses the lease with the highest expiration). The table in the table name cache would have it's lease released rendering the name entry invalid. Therefore until the node acquired another lease for the same table, the node would be in a state where there was a table cached but it's name -> table cache entry invalid, resulting in it making name resolution requests for every query (but not lease renewals). It is likely this bug started happening with the addition of more aggressive lease acquisitions through #28725. I've reduced the value of DefaultTableDescriptorLeaseJitterFraction to make the logic ensuring the monotonicity of the lease expiration fire infrequently. Release note: None --- pkg/base/config.go | 2 +- pkg/sql/lease.go | 63 ++++++++++++++++++++++++------- pkg/sql/lease_internal_test.go | 68 ++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 15 deletions(-) diff --git a/pkg/base/config.go b/pkg/base/config.go index b937d2b867bc..807ae5c5434f 100644 --- a/pkg/base/config.go +++ b/pkg/base/config.go @@ -87,7 +87,7 @@ const ( // DefaultTableDescriptorLeaseJitterFraction is the default factor // that we use to randomly jitter the lease duration when acquiring a // new lease and the lease renewal timeout. - DefaultTableDescriptorLeaseJitterFraction = 0.25 + DefaultTableDescriptorLeaseJitterFraction = 0.05 // DefaultTableDescriptorLeaseRenewalTimeout is the default time // before a lease expires when acquisition to renew the lease begins. diff --git a/pkg/sql/lease.go b/pkg/sql/lease.go index 452006916aa1..d88b2af4ddce 100644 --- a/pkg/sql/lease.go +++ b/pkg/sql/lease.go @@ -106,6 +106,14 @@ func (s *tableVersionState) hasExpired(timestamp hlc.Timestamp) bool { return !timestamp.Less(s.expiration) } +// hasValidExpiration checks that this table have a larger expiration than +// the existing one it is replacing. This can be used to check the +// monotonicity of the expiration times on a table at a particular version. +// The version is not explicitly checked here. +func (s *tableVersionState) hasValidExpiration(existing *tableVersionState) bool { + return existing.expiration.Less(s.expiration) +} + func (s *tableVersionState) incRefcount() { s.mu.Lock() s.incRefcountLocked() @@ -163,11 +171,20 @@ func (s LeaseStore) jitteredLeaseDuration() time.Duration { // acquire a lease on the most recent version of a table descriptor. // If the lease cannot be obtained because the descriptor is in the process of // being dropped, the error will be errTableDropped. -func (s LeaseStore) acquire(ctx context.Context, tableID sqlbase.ID) (*tableVersionState, error) { +// The expiration time set for the lease > minExpiration. +func (s LeaseStore) acquire( + ctx context.Context, minExpiration hlc.Timestamp, tableID sqlbase.ID, +) (*tableVersionState, error) { var table *tableVersionState err := s.execCfg.DB.Txn(ctx, func(ctx context.Context, txn *client.Txn) error { expiration := txn.OrigTimestamp() expiration.WallTime += int64(s.jitteredLeaseDuration()) + if !minExpiration.Less(expiration) { + // In the rare circumstances where expiration <= minExpiration + // use an expiration based on the minExpiration to guarantee + // a monotonically increasing expiration. + expiration = minExpiration.Add(int64(time.Millisecond), 0) + } tableDesc, err := sqlbase.GetTableDescFromID(ctx, txn, tableID) if err != nil { @@ -857,19 +874,33 @@ func (m *LeaseManager) AcquireFreshestFromStore(ctx context.Context, tableID sql // upsertLocked inserts a lease for a particular table version. // If an existing lease exists for the table version it replaces // it and returns it. -func (t *tableState) upsertLocked(ctx context.Context, table *tableVersionState) *storedTableLease { +func (t *tableState) upsertLocked( + ctx context.Context, table *tableVersionState, +) (*storedTableLease, error) { s := t.mu.active.find(table.Version) if s == nil { if t.mu.active.findNewest() != nil { log.Infof(ctx, "new lease: %s", table) } t.mu.active.insert(table) - return nil + return nil, nil + } + + // The table is replacing an existing one at the same version. + if !table.hasValidExpiration(s) { + // This is a violation of an invariant and can actually not + // happen. We return an error here to aid in further investigations. + return nil, errors.Errorf("lease expiration monotonicity violation, (%s) vs (%s)", s, table) } s.mu.Lock() table.mu.Lock() - // subsume the refcount of the older lease. + // subsume the refcount of the older lease. This is permitted because + // the new lease has a greater expiration than the older lease and + // any transaction using the older lease can safely use a deadline set + // to the older lease's expiration even though the older lease is + // released! This is because the new lease is valid at the same table + // version at a greater expiration. table.mu.refcount += s.mu.refcount s.mu.refcount = 0 l := s.mu.lease @@ -881,7 +912,7 @@ func (t *tableState) upsertLocked(ctx context.Context, table *tableVersionState) s.mu.Unlock() t.mu.active.remove(s) t.mu.active.insert(table) - return l + return l, nil } // removeInactiveVersions removes inactive versions in t.mu.active.data with refcount 0. @@ -908,26 +939,30 @@ func (t *tableState) removeInactiveVersions() []*storedTableLease { // If the lease cannot be obtained because the descriptor is in the process of // being dropped, the error will be errTableDropped. -// minExpirationTime, if not set to the zero value, will be used as a lower -// bound on the expiration of the new table. This can be used to eliminate the -// jitter in the expiration time, and guarantee that we get a lease that will be -// inserted at the end of the lease set (i.e. it will be returned by -// findNewest() from now on). The boolean returned is true if this call was actually -// responsible for the lease acquisition. +// The boolean returned is true if this call was actually responsible for the +// lease acquisition. func acquireNodeLease(ctx context.Context, m *LeaseManager, id sqlbase.ID) (bool, error) { var toRelease *storedTableLease resultChan, didAcquire := m.group.DoChan(fmt.Sprintf("acquire%d", id), func() (interface{}, error) { if m.isDraining() { return nil, errors.New("cannot acquire lease when draining") } - table, err := m.LeaseStore.acquire(ctx, id) + newest := m.findNewest(id) + var minExpiration hlc.Timestamp + if newest != nil { + minExpiration = newest.expiration + } + table, err := m.LeaseStore.acquire(ctx, minExpiration, id) if err != nil { return nil, err } t := m.findTableState(id, false /* create */) t.mu.Lock() defer t.mu.Unlock() - toRelease = t.upsertLocked(ctx, table) + toRelease, err = t.upsertLocked(ctx, table) + if err != nil { + return nil, err + } m.tableNames.insert(table) return leaseToken(table), nil }) @@ -1239,7 +1274,7 @@ func (c *tableNameCache) insert(table *tableVersionState) { // If we already have a lease in the cache for this name, see if this one is // better (higher version or later expiration). if table.Version > existing.Version || - (table.Version == existing.Version && (existing.expiration.Less(table.expiration))) { + (table.Version == existing.Version && table.hasValidExpiration(existing)) { // Overwrite the old table. The new one is better. From now on, we want // clients to use the new one. c.tables[key] = table diff --git a/pkg/sql/lease_internal_test.go b/pkg/sql/lease_internal_test.go index fcd8529e107a..88d7b2496313 100644 --- a/pkg/sql/lease_internal_test.go +++ b/pkg/sql/lease_internal_test.go @@ -350,6 +350,74 @@ CREATE TABLE t.%s (k CHAR PRIMARY KEY, v CHAR); } } +// Tests that a name cache entry always exists for the latest lease and +// the lease expiration time is monotonically increasing. +func TestNameCacheContainsLatestLease(t *testing.T) { + defer leaktest.AfterTest(t)() + removalTracker := NewLeaseRemovalTracker() + testingKnobs := base.TestingKnobs{ + SQLLeaseManager: &LeaseManagerTestingKnobs{ + LeaseStoreTestingKnobs: LeaseStoreTestingKnobs{ + LeaseReleasedEvent: removalTracker.LeaseRemovedNotification, + }, + }, + } + s, db, kvDB := serverutils.StartServer(t, base.TestServerArgs{Knobs: testingKnobs}) + defer s.Stopper().Stop(context.TODO()) + leaseManager := s.LeaseManager().(*LeaseManager) + + const tableName = "test" + + if _, err := db.Exec(fmt.Sprintf(` +CREATE DATABASE t; +CREATE TABLE t.%s (k CHAR PRIMARY KEY, v CHAR); +`, tableName)); err != nil { + t.Fatal(err) + } + + tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", tableName) + + // Populate the name cache. + if _, err := db.Exec("SELECT * FROM t.test;"); err != nil { + t.Fatal(err) + } + + // There is a cache entry. + lease := leaseManager.tableNames.get(tableDesc.ParentID, tableName, s.Clock().Now()) + if lease == nil { + t.Fatalf("name cache has no unexpired entry for (%d, %s)", tableDesc.ParentID, tableName) + } + + tracker := removalTracker.TrackRemoval(&lease.ImmutableTableDescriptor) + + // Acquire another lease. + if _, err := acquireNodeLease(context.TODO(), leaseManager, tableDesc.ID); err != nil { + t.Fatal(err) + } + + // Check the name resolves to the new lease. + newLease := leaseManager.tableNames.get(tableDesc.ParentID, tableName, s.Clock().Now()) + if newLease == nil { + t.Fatalf("name cache doesn't contain entry for (%d, %s)", tableDesc.ParentID, tableName) + } + if newLease == lease { + t.Fatalf("same lease %s", newLease.expiration.GoTime()) + } + + if err := leaseManager.Release(&lease.ImmutableTableDescriptor); err != nil { + t.Fatal(err) + } + + // The first lease acquisition was released. + if err := tracker.WaitForRemoval(); err != nil { + t.Fatal(err) + } + + if err := leaseManager.Release(&newLease.ImmutableTableDescriptor); err != nil { + t.Fatal(err) + } +} + // Test that table names are treated as case sensitive by the name cache. func TestTableNameCaseSensitive(t *testing.T) { defer leaktest.AfterTest(t)()