Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding support for container name in plugin's buggify crashloop command #1515

Merged
merged 4 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions api/v1beta2/foundationdbcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2206,6 +2206,37 @@ func (cluster *FoundationDBCluster) AddProcessGroupsToCrashLoopList(processGroup
}
}

// AddProcessGroupsToCrashLoopContainerList adds the provided process group IDs to the crash-loop list.
// If a process group ID is already present on that list it won't be added a second time.
func (cluster *FoundationDBCluster) AddProcessGroupsToCrashLoopContainerList(processGroupIDs []ProcessGroupID, containerName string) {
crashLoopProcessIDs := cluster.GetCrashLoopContainerProcessGroups()[containerName]

if len(crashLoopProcessIDs) == 0 {
containerObj := CrashLoopContainerObject{
ContainerName: containerName,
Targets: processGroupIDs,
}
cluster.Spec.Buggify.CrashLoopContainers = append(cluster.Spec.Buggify.CrashLoopContainers, containerObj)
return
}

containerIdx := 0
for _, crashLoopContainerObj := range cluster.Spec.Buggify.CrashLoopContainers {
if containerName != crashLoopContainerObj.ContainerName {
containerIdx++
continue
}
for _, processGroupID := range processGroupIDs {
if _, ok := crashLoopProcessIDs[processGroupID]; ok {
continue
}
crashLoopContainerObj.Targets = append(crashLoopContainerObj.Targets, processGroupID)
}
cluster.Spec.Buggify.CrashLoopContainers[containerIdx] = crashLoopContainerObj
return
}
}

// RemoveProcessGroupsFromCrashLoopList removes the provided process group IDs from the crash-loop list.
func (cluster *FoundationDBCluster) RemoveProcessGroupsFromCrashLoopList(processGroupIDs []ProcessGroupID) {
processGroupIDsToRemove := make(map[ProcessGroupID]None)
Expand All @@ -2224,6 +2255,32 @@ func (cluster *FoundationDBCluster) RemoveProcessGroupsFromCrashLoopList(process
cluster.Spec.Buggify.CrashLoop = cluster.Spec.Buggify.CrashLoop[:idx]
}

// RemoveProcessGroupsFromCrashLoopContainerList removes the provided process group IDs from the crash-loop container list.
func (cluster *FoundationDBCluster) RemoveProcessGroupsFromCrashLoopContainerList(processGroupIDs []ProcessGroupID, containerName string) {
processGroupIDsToRemove := make(map[ProcessGroupID]None)
for _, processGroupID := range processGroupIDs {
processGroupIDsToRemove[processGroupID] = None{}
}

crashLoopIdx := 0
for _, crashLoopContainerObj := range cluster.Spec.Buggify.CrashLoopContainers {
if containerName != crashLoopContainerObj.ContainerName {
crashLoopIdx++
continue
}
newTargets := make([]ProcessGroupID, 0)
for _, processGroupID := range crashLoopContainerObj.Targets {
if _, ok := processGroupIDsToRemove[processGroupID]; ok {
continue
}
newTargets = append(newTargets, processGroupID)
}
crashLoopContainerObj.Targets = newTargets
cluster.Spec.Buggify.CrashLoopContainers[crashLoopIdx] = crashLoopContainerObj
return
}
}

// AddProcessGroupsToRemovalWithoutExclusionList adds the provided process group IDs to the remove without exclusion list.
// If a process group ID is already present on that list it won't be added a second time.
func (cluster *FoundationDBCluster) AddProcessGroupsToRemovalWithoutExclusionList(processGroupIDs []ProcessGroupID) {
Expand Down
12 changes: 6 additions & 6 deletions kubectl-fdb/cmd/buggify.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ func newBuggifyCmd(streams genericclioptions.IOStreams) *cobra.Command {
Example: `
kubectl fdb -n <namespace> buggify <option> -c <cluster> pod-1 pod-2

# Add process groups into crash loop state for a cluster in the current namespace
kubectl fdb buggify crash-loop -c cluster pod-1 pod-2
# Add process groups into crash loop state for a cluster in the current namespace with container name
kubectl fdb buggify crash-loop -c cluster --container-name container-name pod-1 pod-2

# Remove process groups from crash loop state from a cluster in the current namespace
kubectl fdb buggify crash-loop --clear -c cluster pod-1 pod-2
# Remove process groups from crash loop state from a cluster in the current namespace with container name
kubectl fdb buggify crash-loop --clear -c cluster --container-name container-name pod-1 pod-2

# Clean crash loop list of a cluster in the current namespace
kubectl fdb buggify crash-loop --clean -c cluster
# Clean crash loop list of a cluster in the current namespace with container name
kubectl fdb buggify crash-loop --clean -c cluster --container-name container-name

# Add process groups into no-schedule state for a cluster in the current namespace
kubectl fdb buggify no-schedule -c cluster pod-1 pod-2
Expand Down
49 changes: 31 additions & 18 deletions kubectl-fdb/cmd/buggify_crash_loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package cmd
import (
ctx "context"
"fmt"
fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2"
"log"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -55,6 +56,10 @@ func newBuggifyCrashLoop(streams genericclioptions.IOStreams) *cobra.Command {
if err != nil {
return err
}
containerName, err := cmd.Flags().GetString("container-name")
if err != nil {
return err
}

kubeClient, err := getKubeClient(o)
if err != nil {
Expand All @@ -66,23 +71,22 @@ func newBuggifyCrashLoop(streams genericclioptions.IOStreams) *cobra.Command {
return err
}

return updateCrashLoopList(kubeClient, cluster, args, namespace, wait, clear, clean)
return updateCrashLoopContainerList(kubeClient, cluster, containerName, args, namespace, wait, clear, clean)
},
Example: `
# Add process groups into crash loop state for a cluster in the current namespace
kubectl fdb buggify crash-loop -c cluster pod-1 pod-2

# Remove process groups from crash loop state from a cluster in the current namespace
kubectl fdb buggify crash-loop --clear -c cluster pod-1 pod-2
# Add process groups into crash loop state for a cluster in the current namespace with container name
kubectl fdb buggify crash-loop -c cluster --container-name container-name pod-1 pod-2

# Clean crash loop list of a cluster in the current namespace
kubectl fdb buggify crash-loop --clean -c cluster
# Remove process groups from crash loop state from a cluster in the current namespace with container name
kubectl fdb buggify crash-loop --clear -c cluster --container-name container-name pod-1 pod-2

# Add process groups into crash loop state for a cluster in the namespace default
kubectl fdb -n default buggify crash-loop -c cluster pod-1 pod-2
# Clean crash loop list of a cluster in the current namespace with container name
09harsh marked this conversation as resolved.
Show resolved Hide resolved
kubectl fdb buggify crash-loop --clean -c cluster --container-name container-name
`,
}
cmd.Flags().StringP("fdb-cluster", "c", "", "updates the crash-loop list in the provided cluster.")
cmd.Flags().String("container-name", fdbv1beta2.MainContainerName, "container name to which we want to add/remove process groups.")
cmd.Flags().Bool("clear", false, "removes the process groups from the crash-loop list.")
cmd.Flags().Bool("clean", false, "removes all process groups from the crash-loop list.")
err := cmd.MarkFlagRequired("fdb-cluster")
Expand All @@ -98,8 +102,8 @@ kubectl fdb -n default buggify crash-loop -c cluster pod-1 pod-2
return cmd
}

// updateCrashLoopList updates the crash-loop list of the cluster
func updateCrashLoopList(kubeClient client.Client, clusterName string, pods []string, namespace string, wait bool, clear bool, clean bool) error {
// updateCrashLoopContainerList updates the crash-loop container-list of the cluster
func updateCrashLoopContainerList(kubeClient client.Client, clusterName string, containerName string, pods []string, namespace string, wait bool, clear bool, clean bool) error {
cluster, err := loadCluster(kubeClient, namespace, clusterName)
if err != nil {
if k8serrors.IsNotFound(err) {
Expand All @@ -116,34 +120,43 @@ func updateCrashLoopList(kubeClient client.Client, clusterName string, pods []st
patch := client.MergeFrom(cluster.DeepCopy())
if clean {
if wait {
if !confirmAction(fmt.Sprintf("Clearing crash-loop list from cluster %s/%s", namespace, clusterName)) {
if !confirmAction(fmt.Sprintf("Clearing crash-loop list for %s from cluster %s/%s", containerName, namespace, clusterName)) {
return fmt.Errorf("user aborted the removal")
}
}
cluster.Spec.Buggify.CrashLoop = nil
containerIdx := 0
for _, crashLoopContainerObj := range cluster.Spec.Buggify.CrashLoopContainers {
if crashLoopContainerObj.ContainerName != containerName {
containerIdx++
continue
}
crashLoopContainerObj.Targets = nil
cluster.Spec.Buggify.CrashLoopContainers[containerIdx] = crashLoopContainerObj
break
}
return kubeClient.Patch(ctx.TODO(), cluster, patch)
}

if len(processGroupIDs) == 0 {
return fmt.Errorf("please provide atleast one pod")
return fmt.Errorf("please provide at least one Pod")
}

if wait {
if clear {
if !confirmAction(fmt.Sprintf("Removing %v to crash-loop from cluster %s/%s", processGroupIDs, namespace, clusterName)) {
if !confirmAction(fmt.Sprintf("Removing %v from container: %s in crash-loop container list of the cluster %s/%s", processGroupIDs, containerName, namespace, clusterName)) {
return fmt.Errorf("user aborted the removal")
}
} else {
if !confirmAction(fmt.Sprintf("Adding %v to crash-loop from cluster %s/%s", processGroupIDs, namespace, clusterName)) {
if !confirmAction(fmt.Sprintf("Adding %v to container: %s in crash-loop container list of the cluster %s/%s", processGroupIDs, containerName, namespace, clusterName)) {
return fmt.Errorf("user aborted the removal")
}
}
}

if clear {
cluster.RemoveProcessGroupsFromCrashLoopList(processGroupIDs)
cluster.RemoveProcessGroupsFromCrashLoopContainerList(processGroupIDs, containerName)
} else {
cluster.AddProcessGroupsToCrashLoopList(processGroupIDs)
cluster.AddProcessGroupsToCrashLoopContainerList(processGroupIDs, containerName)
}

return kubeClient.Patch(ctx.TODO(), cluster, patch)
Expand Down
Loading