Skip to content

Commit

Permalink
Merge branch 'main' into hot_shard_tool
Browse files Browse the repository at this point in the history
  • Loading branch information
AnnigeriShambu authored Aug 10, 2022
2 parents 41b58ff + 85e34ff commit 75cefe9
Show file tree
Hide file tree
Showing 45 changed files with 936 additions and 390 deletions.
2 changes: 1 addition & 1 deletion api/v1beta1/foundationdbcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ type AutomaticReplacementOptions struct {

// FailureDetectionTimeSeconds controls how long a process must be
// failed or missing before it is automatically replaced.
// The default is 1800 seconds, or 30 minutes.
// The default is 7200 seconds, or 2 hours.
FailureDetectionTimeSeconds *int `json:"failureDetectionTimeSeconds,omitempty"`

// MaxConcurrentReplacements controls how many automatic replacements are allowed to take part.
Expand Down
4 changes: 3 additions & 1 deletion api/v1beta2/foundationdb_database_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type DatabaseConfiguration struct {

// StorageEngine defines the storage engine the database uses.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=ssd;ssd-1;ssd-2;memory;memory-1;memory-2;ssd-redwood-1-experimental;ssd-rocksdb-experimental;ssd-rocksdb-v1;memory-radixtree-beta;custom
// +kubebuilder:validation:Enum=ssd;ssd-1;ssd-2;memory;memory-1;memory-2;ssd-redwood-1-experimental;ssd-rocksdb-experimental;ssd-rocksdb-v1;ssd-sharded-rocksdb;memory-radixtree-beta;custom
// +kubebuilder:default:=ssd-2
StorageEngine StorageEngine `json:"storage_engine,omitempty"`

Expand Down Expand Up @@ -737,6 +737,8 @@ const (
StorageEngineRocksDbExperimental StorageEngine = "ssd-rocksdb-experimental"
// StorageEngineRocksDbV1 defines the storage engine ssd-rocksdb-v1.
StorageEngineRocksDbV1 StorageEngine = "ssd-rocksdb-v1"
// StorageEngineShardedRocksDB defines the storage engine ssd-sharded-rocksdb.
StorageEngineShardedRocksDB StorageEngine = "ssd-sharded-rocksdb"
)

// RoleCounts represents the roles whose counts can be customized.
Expand Down
4 changes: 4 additions & 0 deletions api/v1beta2/foundationdb_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ type FoundationDBStatusClusterInfo struct {
// FaultTolerance provides information about the fault tolerance status
// of the cluster.
FaultTolerance FaultTolerance `json:"fault_tolerance,omitempty"`

// IncompatibleConnections provides information about processes that try to connect to the cluster with an
// incompatible version.
IncompatibleConnections []string `json:"incompatible_connections,omitempty"`
}

// FaultTolerance provides information about the fault tolerance status
Expand Down
2 changes: 2 additions & 0 deletions api/v1beta2/foundationdb_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ var _ = Describe("FoundationDBStatus", func() {
DatabaseStatus: FoundationDBStatusClientDBStatus{Available: true, Healthy: true},
},
Cluster: FoundationDBStatusClusterInfo{
IncompatibleConnections: []string{},
FaultTolerance: FaultTolerance{
MaxZoneFailuresWithoutLosingAvailability: 1,
MaxZoneFailuresWithoutLosingData: 1,
Expand Down Expand Up @@ -459,6 +460,7 @@ var _ = Describe("FoundationDBStatus", func() {

When("parsing the status json with a 7.1.0-rc1 cluster", func() {
status := FoundationDBStatusClusterInfo{
IncompatibleConnections: []string{},
DatabaseConfiguration: DatabaseConfiguration{
RedundancyMode: "double",
StorageEngine: StorageEngineSSD2,
Expand Down
16 changes: 10 additions & 6 deletions api/v1beta2/foundationdb_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ func (version Version) IsStorageEngineSupported(storageEngine StorageEngine) boo
return version.IsAtLeast(Versions.SupportsRocksDBV1)
} else if storageEngine == StorageEngineRocksDbExperimental {
return !version.IsAtLeast(Versions.SupportsRocksDBV1)
} else if storageEngine == StorageEngineShardedRocksDB {
return version.IsAtLeast(Versions.SupportsShardedRocksDB)
}
return true
}
Expand All @@ -208,12 +210,14 @@ var Versions = struct {
MinimumVersion,
SupportsRocksDBV1,
SupportsIsPresent,
SupportsShardedRocksDB,
Default Version
}{
Default: Version{Major: 6, Minor: 2, Patch: 20},
NextPatchVersion: Version{Major: 6, Minor: 2, Patch: 21},
NextMajorVersion: Version{Major: 7, Minor: 0, Patch: 0},
MinimumVersion: Version{Major: 6, Minor: 2, Patch: 20},
SupportsRocksDBV1: Version{Major: 7, Minor: 1, Patch: 0, ReleaseCandidate: 4},
SupportsIsPresent: Version{Major: 7, Minor: 1, Patch: 4},
Default: Version{Major: 6, Minor: 2, Patch: 20},
NextPatchVersion: Version{Major: 6, Minor: 2, Patch: 21},
NextMajorVersion: Version{Major: 7, Minor: 0, Patch: 0},
MinimumVersion: Version{Major: 6, Minor: 2, Patch: 20},
SupportsRocksDBV1: Version{Major: 7, Minor: 1, Patch: 0, ReleaseCandidate: 4},
SupportsIsPresent: Version{Major: 7, Minor: 1, Patch: 4},
SupportsShardedRocksDB: Version{Major: 7, Minor: 2, Patch: 0},
}
6 changes: 3 additions & 3 deletions api/v1beta2/foundationdbcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ type AutomaticReplacementOptions struct {

// FailureDetectionTimeSeconds controls how long a process must be
// failed or missing before it is automatically replaced.
// The default is 1800 seconds, or 30 minutes.
// The default is 7200 seconds, or 2 hours.
FailureDetectionTimeSeconds *int `json:"failureDetectionTimeSeconds,omitempty"`

// MaxConcurrentReplacements controls how many automatic replacements are allowed to take part.
Expand Down Expand Up @@ -1946,9 +1946,9 @@ func (cluster *FoundationDBCluster) GetEnableAutomaticReplacements() bool {
return pointer.BoolDeref(cluster.Spec.AutomationOptions.Replacements.Enabled, true)
}

// GetFailureDetectionTimeSeconds returns cluster.Spec.AutomationOptions.Replacements.FailureDetectionTimeSeconds or if unset the default 1800
// GetFailureDetectionTimeSeconds returns cluster.Spec.AutomationOptions.Replacements.FailureDetectionTimeSeconds or if unset the default 7200
func (cluster *FoundationDBCluster) GetFailureDetectionTimeSeconds() int {
return pointer.IntDeref(cluster.Spec.AutomationOptions.Replacements.FailureDetectionTimeSeconds, 1800)
return pointer.IntDeref(cluster.Spec.AutomationOptions.Replacements.FailureDetectionTimeSeconds, 7200)
}

// GetSidecarContainerEnableLivenessProbe returns cluster.Spec.SidecarContainer.EnableLivenessProbe or if unset the default true
Expand Down
22 changes: 22 additions & 0 deletions api/v1beta2/foundationdbcluster_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4626,6 +4626,28 @@ var _ = Describe("[api] FoundationDBCluster", func() {
},
fmt.Errorf("storage engine ssd-rocksdb-v1 is not supported on version 6.1.3, stateless is not a valid process class for coordinators"),
),
Entry("using invalid version for sharded rocksdb",
&FoundationDBCluster{
Spec: FoundationDBClusterSpec{
Version: "7.1.4",
DatabaseConfiguration: DatabaseConfiguration{
StorageEngine: StorageEngineShardedRocksDB,
},
},
},
fmt.Errorf("storage engine ssd-sharded-rocksdb is not supported on version 7.1.4"),
),
Entry("using valid version for sharded rocksdb",
&FoundationDBCluster{
Spec: FoundationDBClusterSpec{
Version: "7.2.0",
DatabaseConfiguration: DatabaseConfiguration{
StorageEngine: StorageEngineShardedRocksDB,
},
},
},
nil,
),
)
})

Expand Down
5 changes: 5 additions & 0 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9501,6 +9501,7 @@ spec:
- ssd-redwood-1-experimental
- ssd-rocksdb-experimental
- ssd-rocksdb-v1
- ssd-sharded-rocksdb
- memory-radixtree-beta
- custom
maxLength: 100
Expand Down Expand Up @@ -12919,6 +12920,7 @@ spec:
- ssd-redwood-1-experimental
- ssd-rocksdb-experimental
- ssd-rocksdb-v1
- ssd-sharded-rocksdb
- memory-radixtree-beta
- custom
maxLength: 100
Expand Down
3 changes: 3 additions & 0 deletions config/tests/unified_image/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@
- tagSuffix: "-local"
- op: remove
path: "/spec/processes/general/podTemplate/spec/initContainers/0"
- op: add
path: "/spec/processes/general/podTemplate/spec/serviceAccount"
value: "fdb-kubernetes"
1 change: 1 addition & 0 deletions config/tests/unified_image/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- "../base"
- "unified_image_role.yaml"
patchesJson6902:
- path: images.yaml
target:
Expand Down
31 changes: 31 additions & 0 deletions config/tests/unified_image/unified_image_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: fdb-kubernetes
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: fdb-kubernetes
rules:
- apiGroups:
- ""
resources:
- "pods"
verbs:
- "get"
- "watch"
- "update"
- "patch"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: fdb-kubernetes
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: fdb-kubernetes
subjects:
- kind: ServiceAccount
name: fdb-kubernetes
10 changes: 10 additions & 0 deletions controllers/admin_client_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,16 @@ func (client *mockAdminClient) KillProcesses(addresses []fdbv1beta2.ProcessAddre
}
adminClientMutex.Unlock()

if client.Cluster.Status.RunningVersion != client.Cluster.Spec.Version {
// We have to do this in the mock client, in the real world the tryConnectionOptions in update_status,
// will update the version.
client.Cluster.Status.RunningVersion = client.Cluster.Spec.Version
err := client.KubeClient.Status().Update(context.TODO(), client.Cluster)
if err != nil {
return err
}
}

client.UnfreezeStatus()
return nil
}
Expand Down
24 changes: 9 additions & 15 deletions controllers/bounce_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func (bounceProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReco

if useLocks && upgrading {
var req *requeue
addresses, req = getAddressesForUpgrade(r, adminClient, lockClient, cluster, version)
addresses, req = getAddressesForUpgrade(r, status, lockClient, cluster, version)
if req != nil {
return req
}
Expand All @@ -187,13 +187,13 @@ func (bounceProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReco
if err != nil {
return &requeue{curError: err}
}
}

if upgrading {
cluster.Status.RunningVersion = cluster.Spec.Version
err = r.Status().Update(ctx, cluster)
if err != nil {
return &requeue{curError: err}
// If the cluster was upgraded we will requeue and let the update_status command set the correct version.
// Updating the version in this method has the drawback that we upgrade the version independent of the success
// of the kill command. The kill command is not reliable, which means that some kill request might not be
// delivered and the return value will still not contain any error.
if upgrading {
return &requeue{message: "fetch latest status after upgrade"}
}
}

Expand All @@ -202,28 +202,22 @@ func (bounceProcesses) reconcile(ctx context.Context, r *FoundationDBClusterReco

// getAddressesForUpgrade checks that all processes in a cluster are ready to be
// upgraded and returns the full list of addresses.
func getAddressesForUpgrade(r *FoundationDBClusterReconciler, adminClient fdbadminclient.AdminClient, lockClient fdbadminclient.LockClient, cluster *fdbv1beta2.FoundationDBCluster, version fdbv1beta2.Version) ([]fdbv1beta2.ProcessAddress, *requeue) {
func getAddressesForUpgrade(r *FoundationDBClusterReconciler, databaseStatus *fdbv1beta2.FoundationDBStatus, lockClient fdbadminclient.LockClient, cluster *fdbv1beta2.FoundationDBCluster, version fdbv1beta2.Version) ([]fdbv1beta2.ProcessAddress, *requeue) {
logger := log.WithValues("namespace", cluster.Namespace, "cluster", cluster.Name, "reconciler", "bounceProcesses")
pendingUpgrades, err := lockClient.GetPendingUpgrades(version)
if err != nil {
return nil, &requeue{curError: err}
}

databaseStatus, err := adminClient.GetStatus()
if err != nil {
return nil, &requeue{curError: err}
}

if !databaseStatus.Client.DatabaseStatus.Available {
logger.Info("Deferring upgrade until database is available")
r.Recorder.Event(cluster, corev1.EventTypeNormal, "UpgradeRequeued", "Database is unavailable")
return nil, &requeue{message: "Deferring upgrade until database is available"}
}

notReadyProcesses := make([]string, 0)
addresses := make([]fdbv1beta2.ProcessAddress, 0, len(databaseStatus.Cluster.Processes))
for _, process := range databaseStatus.Cluster.Processes {
processID := process.Locality["instance_id"]
processID := process.Locality[fdbv1beta2.FDBLocalityInstanceIDKey]
if process.Version == version.String() {
continue
}
Expand Down
12 changes: 6 additions & 6 deletions controllers/bounce_processes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ var _ = Describe("bounceProcesses", func() {
}
})

It("should not requeue", func() {
Expect(requeue).To(BeNil())
It("should requeue", func() {
Expect(requeue).NotTo(BeNil())
})

It("should kill all the processes", func() {
Expand Down Expand Up @@ -233,8 +233,8 @@ var _ = Describe("bounceProcesses", func() {
Expect(err).NotTo(HaveOccurred())
})

It("should not requeue", func() {
Expect(requeue).To(BeNil())
It("should requeue", func() {
Expect(requeue).NotTo(BeNil())
})

It("should kill all the processes", func() {
Expand Down Expand Up @@ -272,8 +272,8 @@ var _ = Describe("bounceProcesses", func() {
cluster.Spec.LockOptions.DisableLocks = &disabled
})

It("should not requeue", func() {
Expect(requeue).To(BeNil())
It("should requeue", func() {
Expect(requeue).NotTo(BeNil())
})

It("should kill all the processes", func() {
Expand Down
23 changes: 12 additions & 11 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ import (
"sort"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/FoundationDB/fdb-kubernetes-operator/pkg/fdbadminclient"
"github.com/FoundationDB/fdb-kubernetes-operator/pkg/podmanager"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"sigs.k8s.io/controller-runtime/pkg/controller"

Expand All @@ -53,15 +52,16 @@ import (
// FoundationDBClusterReconciler reconciles a FoundationDBCluster object
type FoundationDBClusterReconciler struct {
client.Client
Recorder record.EventRecorder
Log logr.Logger
InSimulation bool
PodLifecycleManager podmanager.PodLifecycleManager
PodClientProvider func(*fdbv1beta2.FoundationDBCluster, *corev1.Pod) (podclient.FdbPodClient, error)
DatabaseClientProvider DatabaseClientProvider
DeprecationOptions internal.DeprecationOptions
GetTimeout time.Duration
PostTimeout time.Duration
Recorder record.EventRecorder
Log logr.Logger
InSimulation bool
EnableRestartIncompatibleProcesses bool
PodLifecycleManager podmanager.PodLifecycleManager
PodClientProvider func(*fdbv1beta2.FoundationDBCluster, *corev1.Pod) (podclient.FdbPodClient, error)
DatabaseClientProvider DatabaseClientProvider
DeprecationOptions internal.DeprecationOptions
GetTimeout time.Duration
PostTimeout time.Duration
}

// NewFoundationDBClusterReconciler creates a new FoundationDBClusterReconciler with defaults.
Expand Down Expand Up @@ -139,6 +139,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
addPVCs{},
addPods{},
generateInitialClusterFile{},
removeIncompatibleProcesses{},
updateSidecarVersions{},
updatePodConfig{},
updateLabels{},
Expand Down
Loading

0 comments on commit 75cefe9

Please sign in to comment.