Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce API calls to Kubernetes to fetch process group ID information #1295

Merged
merged 2 commits into from
Jul 22, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions internal/pod_models.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ import (

var processClassSanitizationPattern = regexp.MustCompile("[^a-z0-9-]")

// GetProcessGroupIDFromPodName returns the process group ID for a given Pod name.
func GetProcessGroupIDFromPodName(cluster *fdbv1beta2.FoundationDBCluster, podName string) string {
tmpName := strings.ReplaceAll(podName, cluster.Name, "")[1:]

if cluster.Spec.ProcessGroupIDPrefix != "" {
return fmt.Sprintf("%s-%s", cluster.Spec.ProcessGroupIDPrefix, tmpName)
}

return tmpName
}

// GetProcessGroupID generates an ID for a process group.
//
// This will return the pod name and the processGroupID ID.
Expand Down
17 changes: 17 additions & 0 deletions internal/pod_models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3181,4 +3181,21 @@ var _ = Describe("pod_models", func() {
Expect(GetPodDNSName(cluster, "operator-test-storage-1")).To(Equal("operator-test-storage-1.operator-test-1.my-ns.svc.cluster.example"))
})
})

DescribeTable("getting the process group ID from the Pod name", func(cluster *fdbv1beta2.FoundationDBCluster, podName string, expected string) {
Expect(GetProcessGroupIDFromPodName(cluster, podName)).To(Equal(expected))
},
Entry("cluster without prefix", &fdbv1beta2.FoundationDBCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
}, "test-storage-1", "storage-1"),
Entry("cluster with prefix", &fdbv1beta2.FoundationDBCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
Spec: fdbv1beta2.FoundationDBClusterSpec{
ProcessGroupIDPrefix: "prefix",
},
}, "test-storage-1", "prefix-storage-1"))
})
2 changes: 1 addition & 1 deletion kubectl-fdb/cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ func analyzeCluster(cmd *cobra.Command, kubeClient client.Client, clusterName st
confirmed := false

if len(failedProcessGroups) > 0 {
err := replaceProcessGroups(kubeClient, cluster.Name, failedProcessGroups, namespace, true, false, wait, false)
err := replaceProcessGroups(kubeClient, cluster.Name, failedProcessGroups, namespace, true, wait, false, true)
if err != nil {
return err
}
Expand Down
14 changes: 7 additions & 7 deletions kubectl-fdb/cmd/buggify_crash_loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ func newBuggifyCrashLoop(streams genericclioptions.IOStreams) *cobra.Command {
return err
}

processGroups, err := getProcessGroupIDsFromPod(kubeClient, cluster, args, namespace)
if err != nil {
return err
}

return updateCrashLoopList(kubeClient, cluster, processGroups, namespace, wait, clear, clean)
return updateCrashLoopList(kubeClient, cluster, args, namespace, wait, clear, clean)
},
Example: `
# Add process groups into crash loop state for a cluster in the current namespace
Expand Down Expand Up @@ -104,7 +99,7 @@ kubectl fdb -n default buggify crash-loop -c cluster pod-1 pod-2
}

// updateCrashLoopList updates the crash-loop list of the cluster
func updateCrashLoopList(kubeClient client.Client, clusterName string, processGroups []string, namespace string, wait bool, clear bool, clean bool) error {
func updateCrashLoopList(kubeClient client.Client, clusterName string, pods []string, namespace string, wait bool, clear bool, clean bool) error {
cluster, err := loadCluster(kubeClient, namespace, clusterName)
if err != nil {
if k8serrors.IsNotFound(err) {
Expand All @@ -113,6 +108,11 @@ func updateCrashLoopList(kubeClient client.Client, clusterName string, processGr
return err
}

processGroups, err := getProcessGroupIDsFromPodName(cluster, pods)
if err != nil {
return err
}

patch := client.MergeFrom(cluster.DeepCopy())
if clean {
if wait {
Expand Down
35 changes: 17 additions & 18 deletions kubectl-fdb/cmd/buggify_crash_loop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {

When("running buggify crash-loop instances command", func() {
When("adding instances to crash-loop list from a cluster", func() {

type testCase struct {
Instances []string
ExpectedInstancesInCrashLoop []string
Expand All @@ -83,21 +82,21 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {
},
Entry("Adding single instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInCrashLoop: []string{"instance-1"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInCrashLoop: []string{"storage-1"},
}),
Entry("Adding multiple instances.",
testCase{
Instances: []string{"instance-1", "instance-2"},
ExpectedInstancesInCrashLoop: []string{"instance-1", "instance-2"},
Instances: []string{"test-storage-1", "test-storage-2"},
ExpectedInstancesInCrashLoop: []string{"storage-1", "storage-2"},
}),
)

When("a process group was already in crash-loop", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.CrashLoop = []string{"instance-1"}
cluster.Spec.Buggify.CrashLoop = []string{"storage-1"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down Expand Up @@ -125,18 +124,18 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {
},
Entry("Adding the same instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInCrashLoop: []string{"instance-1"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInCrashLoop: []string{"storage-1"},
}),
Entry("Adding different instance.",
testCase{
Instances: []string{"instance-2"},
ExpectedInstancesInCrashLoop: []string{"instance-1", "instance-2"},
Instances: []string{"test-storage-2"},
ExpectedInstancesInCrashLoop: []string{"storage-1", "storage-2"},
}),
Entry("Adding multiple instances.",
testCase{
Instances: []string{"instance-2", "instance-3"},
ExpectedInstancesInCrashLoop: []string{"instance-1", "instance-2", "instance-3"},
Instances: []string{"test-storage-2", "test-storage-3"},
ExpectedInstancesInCrashLoop: []string{"storage-1", "storage-2", "storage-3"},
}),
)
})
Expand All @@ -146,7 +145,7 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.CrashLoop = []string{"instance-1", "instance-2", "instance-3"}
cluster.Spec.Buggify.CrashLoop = []string{"storage-1", "storage-2", "storage-3"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down Expand Up @@ -174,13 +173,13 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {
},
Entry("Removing single instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInCrashLoop: []string{"instance-2", "instance-3"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInCrashLoop: []string{"storage-2", "storage-3"},
}),
Entry("Removing multiple instances.",
testCase{
Instances: []string{"instance-2", "instance-3"},
ExpectedInstancesInCrashLoop: []string{"instance-1"},
Instances: []string{"test-storage-2", "test-storage-3"},
ExpectedInstancesInCrashLoop: []string{"storage-1"},
}),
)

Expand All @@ -190,7 +189,7 @@ var _ = Describe("[plugin] buggify crash-loop instances command", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.CrashLoop = []string{"instance-1", "instance-2", "instance-3"}
cluster.Spec.Buggify.CrashLoop = []string{"storage-1", "storage-2", "storage-3"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down
14 changes: 7 additions & 7 deletions kubectl-fdb/cmd/buggify_no_schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ func newBuggifyNoSchedule(streams genericclioptions.IOStreams) *cobra.Command {
return err
}

processGroups, err := getProcessGroupIDsFromPod(kubeClient, cluster, args, namespace)
if err != nil {
return err
}

return updateNoScheduleList(kubeClient, cluster, processGroups, namespace, wait, clear, clean)
return updateNoScheduleList(kubeClient, cluster, args, namespace, wait, clear, clean)
},
Example: `
# Add process groups into no-schedule state for a cluster in the current namespace
Expand Down Expand Up @@ -105,7 +100,7 @@ kubectl fdb -n default buggify no-schedule -c cluster pod-1 pod-2
}

// updateNoScheduleList updates the removal list of the cluster
func updateNoScheduleList(kubeClient client.Client, clusterName string, processGroups []string, namespace string, wait bool, clear bool, clean bool) error {
func updateNoScheduleList(kubeClient client.Client, clusterName string, pods []string, namespace string, wait bool, clear bool, clean bool) error {
cluster, err := loadCluster(kubeClient, namespace, clusterName)
if err != nil {
if k8serrors.IsNotFound(err) {
Expand All @@ -114,6 +109,11 @@ func updateNoScheduleList(kubeClient client.Client, clusterName string, processG
return err
}

processGroups, err := getProcessGroupIDsFromPodName(cluster, pods)
johscheuer marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}

patch := client.MergeFrom(cluster.DeepCopy())
if clean {
if wait {
Expand Down
34 changes: 17 additions & 17 deletions kubectl-fdb/cmd/buggify_no_schedule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,21 @@ var _ = Describe("[plugin] buggify no-schedule instances command", func() {
},
Entry("Adding single instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInNoSchedule: []string{"instance-1"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInNoSchedule: []string{"storage-1"},
}),
Entry("Adding multiple instances.",
testCase{
Instances: []string{"instance-1", "instance-2"},
ExpectedInstancesInNoSchedule: []string{"instance-1", "instance-2"},
Instances: []string{"test-storage-1", "test-storage-2"},
ExpectedInstancesInNoSchedule: []string{"storage-1", "storage-2"},
}),
)

When("a process group is already in no-schedule", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.NoSchedule = []string{"instance-1"}
cluster.Spec.Buggify.NoSchedule = []string{"storage-1"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down Expand Up @@ -124,18 +124,18 @@ var _ = Describe("[plugin] buggify no-schedule instances command", func() {
},
Entry("Adding the same instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInNoSchedule: []string{"instance-1"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInNoSchedule: []string{"storage-1"},
}),
Entry("Adding different instance.",
testCase{
Instances: []string{"instance-2"},
ExpectedInstancesInNoSchedule: []string{"instance-1", "instance-2"},
Instances: []string{"test-storage-2"},
ExpectedInstancesInNoSchedule: []string{"storage-1", "storage-2"},
}),
Entry("Adding multiple instances.",
testCase{
Instances: []string{"instance-2", "instance-3"},
ExpectedInstancesInNoSchedule: []string{"instance-1", "instance-2", "instance-3"},
Instances: []string{"test-storage-2", "test-storage-3"},
ExpectedInstancesInNoSchedule: []string{"storage-1", "storage-2", "storage-3"},
}),
)
})
Expand All @@ -145,7 +145,7 @@ var _ = Describe("[plugin] buggify no-schedule instances command", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.NoSchedule = []string{"instance-1", "instance-2", "instance-3"}
cluster.Spec.Buggify.NoSchedule = []string{"storage-1", "storage-2", "storage-3"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down Expand Up @@ -173,13 +173,13 @@ var _ = Describe("[plugin] buggify no-schedule instances command", func() {
},
Entry("Removing single instance.",
testCase{
Instances: []string{"instance-1"},
ExpectedInstancesInNoSchedule: []string{"instance-2", "instance-3"},
Instances: []string{"test-storage-1"},
ExpectedInstancesInNoSchedule: []string{"storage-2", "storage-3"},
}),
Entry("Removing multiple instances.",
testCase{
Instances: []string{"instance-2", "instance-3"},
ExpectedInstancesInNoSchedule: []string{"instance-1"},
Instances: []string{"test-storage-2", "test-storage-3"},
ExpectedInstancesInNoSchedule: []string{"storage-1"},
}),
)

Expand All @@ -189,7 +189,7 @@ var _ = Describe("[plugin] buggify no-schedule instances command", func() {
var kubeClient client.Client

BeforeEach(func() {
cluster.Spec.Buggify.NoSchedule = []string{"instance-1", "instance-2", "instance-3"}
cluster.Spec.Buggify.NoSchedule = []string{"storage-1", "storage-2", "storage-3"}
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = fdbv1beta2.AddToScheme(scheme)
Expand Down
4 changes: 2 additions & 2 deletions kubectl-fdb/cmd/cordon.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func cordonNode(kubeClient client.Client, cluster *fdbv1beta2.FoundationDBCluste
}

for _, pod := range pods.Items {
// With the field selector above this shouldn't be required but it's good to
// With the field selector above this shouldn't be required, but it's good to
// have a second check.
if pod.Spec.NodeName != node {
fmt.Printf("Pod: %s is not running on node %s will be ignored\n", pod.Name, node)
Expand All @@ -161,5 +161,5 @@ func cordonNode(kubeClient client.Client, cluster *fdbv1beta2.FoundationDBCluste
}
}

return replaceProcessGroups(kubeClient, cluster.Name, processGroups, namespace, withExclusion, false, wait, false)
return replaceProcessGroups(kubeClient, cluster.Name, processGroups, namespace, withExclusion, wait, false, true)
}
30 changes: 12 additions & 18 deletions kubectl-fdb/cmd/k8s_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ package cmd
import (
"bytes"
ctx "context"
"fmt"
"strings"

fdbv1beta1 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta1"
fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2"
Expand Down Expand Up @@ -199,29 +201,21 @@ func getAllPodsFromClusterWithCondition(kubeClient client.Client, clusterName st
return processes, nil
}

func getProcessGroupIDsFromPod(kubeClient client.Client, clusterName string, podNames []string, namespace string) ([]string, error) {
// getProcessGroupIDsFromPodName returns the process group IDs based on the cluster configuration.
func getProcessGroupIDsFromPodName(cluster *fdbv1beta2.FoundationDBCluster, podNames []string) ([]string, error) {
processGroups := make([]string, 0, len(podNames))
// Build a map to filter faster
podNameMap := map[string]struct{}{}
for _, name := range podNames {
podNameMap[name] = struct{}{}
}

cluster, err := loadCluster(kubeClient, namespace, clusterName)
if err != nil {
return processGroups, err
}
pods, err := getPodsForCluster(kubeClient, cluster, namespace)
if err != nil {
return processGroups, err
}

for _, pod := range pods.Items {
if _, ok := podNameMap[pod.Name]; !ok {
// TODO(johscheuer): We could validate if the provided process group is actually part of the cluster
for _, podName := range podNames {
if podName == "" {
continue
}

processGroups = append(processGroups, pod.Labels[cluster.GetProcessGroupIDLabel()])
if !strings.HasPrefix(podName, cluster.Name) {
return nil, fmt.Errorf("cluster name %s is not set as prefix for Pod name %s, please ensure the specified Pod is part of the cluster", cluster.Name, podName)
}

processGroups = append(processGroups, internal.GetProcessGroupIDFromPodName(cluster, podName))
}

return processGroups, nil
Expand Down
Loading