Skip to content

Commit

Permalink
fix: allow running as root to inject chaos (#525)
Browse files Browse the repository at this point in the history
Closes #520
  • Loading branch information
lenaschoenburg authored Apr 23, 2024
2 parents af26a08 + 755f387 commit 1989c57
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 286 deletions.
10 changes: 0 additions & 10 deletions go-chaos/backend/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,6 @@ func prepareGatewayDisconnect(kubeConfigPath string, namespace string) (internal
return k8Client, nil, nil, err
}

err = k8Client.ApplyNetworkPatchOnGateway()
if err != nil {
return internal.K8Client{}, nil, nil, err
}

internal.LogVerbose("Patched deployment")

err = k8Client.AwaitReadiness()
Expand All @@ -210,11 +205,6 @@ func prepareBrokerDisconnect(kubeConfigPath string, namespace string) (internal.
return internal.K8Client{}, err
}

err = k8Client.ApplyNetworkPatch()
if err != nil {
return internal.K8Client{}, err
}

internal.LogVerbose("Patched statefulset")
return k8Client, nil
}
Expand Down
17 changes: 6 additions & 11 deletions go-chaos/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,10 @@ type CurrentTopology struct {
}

type BrokerState struct {
Id int32
State string
Version int64
LastUpdatedAt string
Partitions []PartitionState
Id int32
State string
Version int64
Partitions []PartitionState
}

type PartitionState struct {
Expand All @@ -370,17 +369,13 @@ type PartitionState struct {
}

type LastChange struct {
Id int64
Status string
StartedAt string
CompletedAt string
Id int64
Status string
}

type TopologyChange struct {
Id int64
Status string
StartedAt string
CompletedAt string
InternalVersion int64
Completed []Operation
Pending []Operation
Expand Down
10 changes: 2 additions & 8 deletions go-chaos/cmd/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,12 @@ func Test_DescribeChangeStatusWithPending(t *testing.T) {
Version: 1,
Brokers: []BrokerState{},
LastChange: &LastChange{
Id: 2,
Status: "COMPLETED",
StartedAt: "2021-09-01T12:00:00.000Z",
CompletedAt: "2021-09-01T12:00:00.000Z",
Id: 2,
Status: "COMPLETED",
},
PendingChange: &TopologyChange{
Id: 3,
Status: "IN_PROGRESS",
StartedAt: "2021-09-01T12:00:00.000Z",
CompletedAt: "2021-09-01T12:00:00.000Z",
InternalVersion: 1,
Completed: []Operation{
{
Expand Down Expand Up @@ -82,8 +78,6 @@ func Test_DescribeChangeStatusWithoutCompleted(t *testing.T) {
PendingChange: &TopologyChange{
Id: 3,
Status: "IN_PROGRESS",
StartedAt: "2021-09-01T12:00:00.000Z",
CompletedAt: "2021-09-01T12:00:00.000Z",
InternalVersion: 1,
Completed: []Operation{
{
Expand Down
4 changes: 2 additions & 2 deletions go-chaos/cmd/stress.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func AddStressCmd(rootCmd *cobra.Command, flags *Flags) {
internal.LogInfo("Put stress on %s", pod.Name)

stressType := internal.StressType{CpuStress: flags.cpuStress, IoStress: flags.ioStress, MemStress: flags.memoryStress}
err = internal.PutStressOnPod(k8Client, flags.timeoutSec, pod.Name, stressType)
err = internal.PutStressOnPod(k8Client, flags.timeoutSec, pod.Name, "zeebe", stressType)
ensureNoError(err)
},
}
Expand All @@ -69,7 +69,7 @@ func AddStressCmd(rootCmd *cobra.Command, flags *Flags) {
internal.LogInfo("Put stress on %s", pod.Name)

stressType := internal.StressType{CpuStress: flags.cpuStress, IoStress: flags.ioStress, MemStress: flags.memoryStress}
err = internal.PutStressOnPod(k8Client, flags.timeoutSec, pod.Name, stressType)
err = internal.PutStressOnPod(k8Client, flags.timeoutSec, pod.Name, "zeebe-gateway", stressType)
ensureNoError(err)
},
}
Expand Down
163 changes: 27 additions & 136 deletions go-chaos/internal/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,150 +15,41 @@
package internal

import (
"bytes"
"context"
"errors"
"fmt"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)

func (c K8Client) ApplyNetworkPatch() error {

statefulSet, err := c.GetZeebeStatefulSet()
if err != nil {
return err
}

// We need to run the container with root to allow install tooling and give it network admin capabilities
patch := []byte(`{
"spec":{
"template":{
"spec":{
"containers":[
{
"name": "zeebe",
"securityContext":{
"runAsUser": 0,
"capabilities":{
"add":["NET_ADMIN"]
}
}
}]
}
}
}
}`)

_, err = c.Clientset.AppsV1().StatefulSets(c.GetCurrentNamespace()).Patch(context.TODO(), statefulSet.Name, types.StrategicMergePatchType, patch, metav1.PatchOptions{})
return err
}

func (c K8Client) ApplyNetworkPatchOnGateway() error {

deployment, err := c.getGatewayDeployment()
if err != nil {
return err
}

// We need to run the container with root to allow install tooling and give it network admin capabilities
patch := []byte(`{
"spec":{
"template":{
"spec":{
"containers":[
{
"name": "zeebe-gateway",
"securityContext":{
"runAsUser": 0,
"capabilities":{
"add":["NET_ADMIN"]
}
}
}]
}
}
}
}`)

_, err = c.Clientset.AppsV1().Deployments(c.GetCurrentNamespace()).Patch(context.TODO(), deployment.Name, types.StrategicMergePatchType, patch, metav1.PatchOptions{})
return err
}

func MakeIpUnreachableForPod(k8Client K8Client, podIp string, podName string) error {
// We try to reduce the system output in order to not break the execution. There is a limit for the sout for exec,
// for more details see remotecommand.StreamOptions

// the -qq flag makes the tool less noisy, remove it to get more output
err := k8Client.ExecuteCmdOnPod([]string{"apt", "-qq", "update"}, podName)
if err != nil {
return err
}

// the -qq flag makes the tool less noisy, remove it to get more output
err = k8Client.ExecuteCmdOnPod([]string{"apt", "-qq", "install", "-y", "iproute2"}, podName)
if err != nil {
return err
}

// we use replace to not break the execution, since add will return an exit code > 0 if the route exist
err = k8Client.ExecuteCmdOnPod([]string{"ip", "route", "replace", "unreachable", podIp}, podName)
if err != nil {
return err
}

return nil
cmd := []string{"ip", "route", "replace", "unreachable", podIp}
cmdWithSetup := []string{"sh", "-c", "apt update && apt install -y iproute2 && " + strings.Join(cmd, " ")}
var containerName string
if strings.Contains(podName, "gateway") {
containerName = "zeebe-gateway"
} else {
containerName = "zeebe"
}
return k8Client.ExecuteCommandViaDebugContainer(podName, containerName, "camunda/zeebe", cmdWithSetup)
}

func MakeIpReachableForPod(k8Client K8Client, podName string) error {
// We try to reduce the system output in order to not break the execution. There is a limit for the sout for exec,
// for more details see remotecommand.StreamOptions

// we use replace to not break the execution, since add will return an exit code > 0 if the route exist
err := k8Client.ExecuteCmdOnPod([]string{"sh", "-c", "command -v ip"}, podName)

if err != nil {
if strings.Contains(err.Error(), "exit code 127") {
return errors.New("Execution exited with exit code 127 (Command not found). It is likely that the broker was not disconnected or restarted in between.")
}
return err
}

var buf bytes.Buffer
err = k8Client.ExecuteCmdOnPodWriteIntoOutput([]string{"sh", "-c", "ip route | grep -m 1 unreachable"}, podName, &buf)

if err != nil {
if strings.Contains(err.Error(), "exit code 1") {
return errors.New("Execution exited with exit code 1 (ip route not found). It is likely that the broker was not disconnected or restarted in between.")
}
return err
}

err = k8Client.ExecuteCmdOnPodWriteIntoOutput([]string{"sh", "-c", fmt.Sprintf("ip route del %s", strings.TrimSpace(buf.String()))}, podName, &buf)

if err != nil {
return err
}

return nil
cmd := "ip route del $(ip route | grep -m 1 unreachable)"
cmdWithSetup := []string{"sh", "-c", "apt update && apt install -y iproute2 && " + cmd}
var containerName string
if strings.Contains(podName, "gateway") {
containerName = "zeebe-gateway"
} else {
containerName = "zeebe"
}
return k8Client.ExecuteCommandViaDebugContainer(podName, containerName, "camunda/zeebe", cmdWithSetup)
}

func MakeIpReachable(k8Client K8Client, podName string, ip string) error {
// We try to reduce the system output in order to not break the execution. There is a limit for the sout for exec,
// for more details see remotecommand.StreamOptions

err := k8Client.ExecuteCmdOnPod([]string{"sh", "-c", "command -v ip"}, podName)
if err != nil && strings.Contains(err.Error(), "exit code 127") {
return errors.New("Execution exited with exit code 127 (Command not found). It is likely that the broker was not disconnected or restarted in between.")
}

var buf bytes.Buffer
err = k8Client.ExecuteCmdOnPodWriteIntoOutput([]string{"sh", "-c", fmt.Sprintf("ip route del unreachable %s", ip)}, podName, &buf)
if err != nil {
return err
}

return nil
cmd := "ip route del unreachable " + ip
cmdWithSetup := []string{"sh", "-c", "apt update && apt install -y iproute2 && " + cmd}
var containerName string
if strings.Contains(podName, "gateway") {
containerName = "zeebe-gateway"
} else {
containerName = "zeebe"
}
return k8Client.ExecuteCommandViaDebugContainer(podName, containerName, "camunda/zeebe", cmdWithSetup)
}
48 changes: 0 additions & 48 deletions go-chaos/internal/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,51 +13,3 @@
// limitations under the License.

package internal

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func Test_ShouldApplyNetworkPatchOnStatefulSet(t *testing.T) {
// given
k8Client := CreateFakeClient()
k8Client.createSaaSCRD(t)
k8Client.CreateStatefulSetWithLabelsAndName(t, &metav1.LabelSelector{}, "zeebe")

// when
err := k8Client.ApplyNetworkPatch()

// then
require.NoError(t, err)

statefulSet, err := k8Client.GetZeebeStatefulSet()
require.NoError(t, err)

require.NotNil(t, statefulSet)
assert.Equal(t, v1.Capability("NET_ADMIN"), statefulSet.Spec.Template.Spec.Containers[0].SecurityContext.Capabilities.Add[0], "Expected to add capability to statefulset")
}

func Test_ShouldApplyNetworkPatchOnDeployment(t *testing.T) {
// given
k8Client := CreateFakeClient()
selector, err := metav1.ParseToLabelSelector(getSelfManagedGatewayLabels())
require.NoError(t, err)
k8Client.CreateDeploymentWithLabelsAndName(t, selector, "zeebe-gateway")

// when
err = k8Client.ApplyNetworkPatchOnGateway()

// then
require.NoError(t, err)

deployment, err := k8Client.getGatewayDeployment()
require.NoError(t, err)

require.NotNil(t, deployment)
assert.Equal(t, v1.Capability("NET_ADMIN"), deployment.Spec.Template.Spec.Containers[0].SecurityContext.Capabilities.Add[0], "Expected to add capability to deployment")
}
30 changes: 30 additions & 0 deletions go-chaos/internal/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io"
"k8s.io/utils/ptr"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -302,6 +303,35 @@ func (c K8Client) createPortForwardUrl(names []string) *url.URL {
return portForwardCreateURL
}

func (c K8Client) ExecuteCommandViaDebugContainer(podName string, containerName string, debugImage string, cmd []string) error {
pod, err := c.Clientset.CoreV1().Pods(c.GetCurrentNamespace()).Get(context.TODO(), podName, metav1.GetOptions{})
if err != nil {
return err
}
name := "debug-" + rand.String(6)
debugContainer := v1.EphemeralContainer{
EphemeralContainerCommon: v1.EphemeralContainerCommon{
Name: name,
Image: debugImage,
Command: cmd,
SecurityContext: &v1.SecurityContext{
RunAsNonRoot: ptr.To(false),
RunAsUser: ptr.To(int64(0)),
RunAsGroup: ptr.To(int64(0)),
Privileged: ptr.To(true),
},
},
TargetContainerName: containerName,
}
pod.Spec.EphemeralContainers = append(pod.Spec.EphemeralContainers, debugContainer)
_, err = c.Clientset.CoreV1().Pods(c.GetCurrentNamespace()).UpdateEphemeralContainers(context.TODO(), pod.Name, pod, metav1.UpdateOptions{})
if err != nil {
return err
}
LogVerbose("Debug container %s is running command %v", name, cmd)
return nil
}

func (c K8Client) ExecuteCmdOnPod(cmd []string, pod string) error {
if Verbosity {
return c.ExecuteCmdOnPodWriteIntoOutput(cmd, pod, os.Stdout)
Expand Down
Loading

0 comments on commit 1989c57

Please sign in to comment.