Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.2: kvserver,clusterversion: {version,feature}-gate lease upgrade logic #88624

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,4 +295,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 22.1-70 set the active cluster version in the format '<major>.<minor>'
version version 22.1-72 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,6 @@
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/#/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-70</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-72</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
14 changes: 14 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,16 @@ const (
// version is enabled, the receiver will look at the priority of snapshots
// using the fields added in 22.2.
PrioritizeSnapshots
// EnableLeaseUpgrade version gates a change in the lease transfer protocol
// whereby we only ever transfer expiration-based leases (and have
// recipients later upgrade them to the more efficient epoch based ones).
// This was done to limit the effects of ill-advised lease transfers since
// the incoming leaseholder would need to recognize itself as such within a
// few seconds. This needs version gating so that in mixed-version clusters,
// as part of lease transfers, we don't start sending out expiration based
// leases to nodes that (i) don't expect them for certain keyspans, and (ii)
// don't know to upgrade them to efficient epoch-based ones.
EnableLeaseUpgrade

// *************************************************
// Step (1): Add new versions here.
Expand Down Expand Up @@ -464,6 +474,10 @@ var rawVersionsSingleton = keyedVersions{
Key: PrioritizeSnapshots,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 70},
},
{
Key: EnableLeaseUpgrade,
Version: roachpb.Version{Major: 22, Minor: 1, Internal: 72},
},

// *************************************************
// Step (2): Add new versions here.
Expand Down
5 changes: 3 additions & 2 deletions pkg/clusterversion/key_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 72 additions & 1 deletion pkg/kv/kvserver/client_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/config/zonepb"
"github.com/cockroachdb/cockroach/pkg/gossip"
"github.com/cockroachdb/cockroach/pkg/keys"
Expand Down Expand Up @@ -1364,7 +1365,6 @@ func TestLeaseTransfersUseExpirationLeasesAndBumpToEpochBasedOnes(t *testing.T)
if !li.Current().OwnedBy(n2.GetFirstStoreID()) {
return errors.New("lease still owned by n1")
}
require.Equal(t, roachpb.LeaseExpiration, li.Current().Type())
return nil
})

Expand All @@ -1376,3 +1376,74 @@ func TestLeaseTransfersUseExpirationLeasesAndBumpToEpochBasedOnes(t *testing.T)
defer mu.Unlock()
require.Equal(t, roachpb.LeaseExpiration, mu.lease.Type())
}

// TestLeaseUpgradeVersionGate tests the version gating for the lease-upgrade
// process.
//
// TODO(irfansharif): Delete this in 23.1 (or whenever we get rid of the
// clusterversion.EnableLeaseUpgrade).
func TestLeaseUpgradeVersionGate(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
st := cluster.MakeTestingClusterSettingsWithVersions(
clusterversion.TestingBinaryVersion,
clusterversion.ByKey(clusterversion.EnableLeaseUpgrade-1),
false, /* initializeVersion */
)
tci := serverutils.StartNewTestCluster(t, 2, base.TestClusterArgs{
ReplicationMode: base.ReplicationManual,
ServerArgs: base.TestServerArgs{
Settings: st,
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DisableAutomaticVersionUpgrade: make(chan struct{}),
BinaryVersionOverride: clusterversion.ByKey(clusterversion.EnableLeaseUpgrade - 1),
},
},
},
})
tc := tci.(*testcluster.TestCluster)
defer tc.Stopper().Stop(ctx)

scratchKey := tc.ScratchRange(t)
n1, n1Target := tc.Server(0), tc.Target(0)
n2, n2Target := tc.Server(1), tc.Target(1)

// Add a replica; we're going to move the lease to and from it below.
desc := tc.AddVotersOrFatal(t, scratchKey, n2Target)

// Transfer the lease from n1 to n2. It should be transferred as an
// epoch-based one since we've not upgraded past
// clusterversion.EnableLeaseUpgrade yet.
tc.TransferRangeLeaseOrFatal(t, desc, n2Target)
testutils.SucceedsSoon(t, func() error {
li, _, err := tc.FindRangeLeaseEx(ctx, desc, nil)
require.NoError(t, err)
if !li.Current().OwnedBy(n2.GetFirstStoreID()) {
return errors.New("lease still owned by n1")
}
require.Equal(t, roachpb.LeaseEpoch, li.Current().Type())
return nil
})

// Enable the version gate.
_, err := tc.Conns[0].ExecContext(ctx, `SET CLUSTER SETTING version = $1`,
clusterversion.ByKey(clusterversion.EnableLeaseUpgrade).String())
require.NoError(t, err)

// Transfer the lease back from n2 to n1. It should be transferred as an
// expiration-based lease that's later upgraded to an epoch based one now
// that we're past the version gate.
tc.TransferRangeLeaseOrFatal(t, desc, n1Target)
testutils.SucceedsSoon(t, func() error {
li, _, err := tc.FindRangeLeaseEx(ctx, desc, nil)
require.NoError(t, err)
if !li.Current().OwnedBy(n1.GetFirstStoreID()) {
return errors.New("lease still owned by n2")
}
return nil
})
tc.WaitForLeaseUpgrade(ctx, t, desc)
}
14 changes: 13 additions & 1 deletion pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/constraint"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/retry"
Expand All @@ -66,6 +68,13 @@ import (
"go.etcd.io/etcd/raft/v3"
)

var transferExpirationLeasesFirstEnabled = settings.RegisterBoolSetting(
settings.SystemOnly,
"kv.transfer_expiration_leases_first.enabled",
"controls whether we transfer expiration-based leases that are later upgraded to epoch-based ones",
true,
)

var leaseStatusLogLimiter = func() *log.EveryN {
e := log.Every(15 * time.Second)
e.ShouldLog() // waste the first shot
Expand Down Expand Up @@ -247,7 +256,10 @@ func (p *pendingLeaseRequest) InitOrJoinRequest(
ProposedTS: &status.Now,
}

if p.repl.requiresExpiringLeaseRLocked() || transfer {
if p.repl.requiresExpiringLeaseRLocked() ||
(transfer &&
transferExpirationLeasesFirstEnabled.Get(&p.repl.store.ClusterSettings().SV) &&
p.repl.store.ClusterSettings().Version.IsActive(ctx, clusterversion.EnableLeaseUpgrade)) {
// In addition to ranges that unconditionally require expiration-based
// leases (node liveness and earlier), we also use them during lease
// transfers for all other ranges. After acquiring these expiration
Expand Down