Skip to content

Commit

Permalink
Migrate worker restart experiment (#281)
Browse files Browse the repository at this point in the history
First fixed an issue with the worker deployments:

In self-managed env the zeebe gateway service is called differently,
based on the helm release name.
The release name is in our setups normally also the namespace name,
which we use here.

If the worker is already deployed we update the deployment, instead of
failing.

+ Added missing tests for it.

Migrated the worker restart experiment related to #237 

Similar to the other migrated experiments I followed same approach as
described here #268


> The experiment was executed and verified via the integration test
against a self-managed cluster.
> 
> I moved the experiment into the chaos-experiments/camunda-cloud/test/
folder and migrated it, with that approach I was able to execute the
experiment with eze and running against my self-managed zell-chaos zeebe
cluster.



----------


This is btw the LAST production s experiment to migrate 🎉
  • Loading branch information
ChrisKujawa authored Dec 8, 2022
2 parents 019dec4 + e4887b9 commit 9861367
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,85 @@
"tolerance": 0,
"provider": {
"type": "process",
"path": "verify-readiness.sh",
"path": "zbchaos",
"arguments": ["verify", "readiness"],
"timeout": 900
}
},
{
"name": "Should be able to create a process and await the result",
"name": "Can deploy process model",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "await-processes-with-result.sh",
"arguments": "1",
"path": "zbchaos",
"arguments": ["deploy", "process"],
"timeout": 900
}
},
{
"name": "Should be able to create process instances on partition 1",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "instance-creation", "--partitionId", "1"],
"timeout": 900
}
}
]
},
"method": [
{
"type": "action",
"name": "Restart worker pod",
"name": "Deploy Workers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "terminate-workers.sh",
"path": "zbchaos",
"arguments": ["deploy", "worker"],
"timeout": 900
},
"pauses": {
"after": 5
}
},
{
"name": "Should be able to create a process and await the result",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "instance-creation", "--partitionId", "1", "--awaitResult"],
"timeout": 900
}
},
{
"name": "Restart Workers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["restart", "worker"],
"timeout": 900
},
"pauses": {
"after": 5
}
},
{
"name": "Should be able to create a process and await the result",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "instance-creation", "--partitionId", "1", "--awaitResult"],
"timeout": 900
}
}
],
"rollbacks": []
Expand Down
23 changes: 23 additions & 0 deletions go-chaos/internal/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"embed"
"errors"
"fmt"
"strings"

v12 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -64,6 +65,28 @@ func (c K8Client) CreateWorkerDeployment() error {
return err
}

if !c.SaaSEnv {
// We are in self-managed environment
// We have to update the service url such that our workers can connect
// We expect that the used helm release name is == to the namespace name

// JAVA_OPTIONS
envVar := deployment.Spec.Template.Spec.Containers[0].Env[0]
envVar.Value = strings.Replace(envVar.Value, "zeebe-service:26500", fmt.Sprintf("%s-zeebe-gateway:26500", c.GetCurrentNamespace()), 1)
deployment.Spec.Template.Spec.Containers[0].Env[0] = envVar
}

_, err = c.Clientset.AppsV1().Deployments(c.GetCurrentNamespace()).Create(context.TODO(), deployment, metav1.CreateOptions{})

if err != nil {
if err.Error() == "deployments.apps \"worker\" already exists" {
LogInfo("Workers have already deployed, update deployment.")
_, err = c.Clientset.AppsV1().Deployments(c.GetCurrentNamespace()).Update(context.TODO(), deployment, metav1.UpdateOptions{})
if err != nil {
return err
}
return nil
}
}
return err
}
36 changes: 36 additions & 0 deletions go-chaos/internal/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,40 @@ func Test_ShouldDeployWorkerDeployment(t *testing.T) {

assert.Equal(t, 1, len(deploymentList.Items))
assert.Equal(t, "worker", deploymentList.Items[0].Name)
assert.Contains(t, deploymentList.Items[0].Spec.Template.Spec.Containers[0].Env[0].Value, "-Dapp.brokerUrl=testNamespace-zeebe-gateway:26500")
}

func Test_ShouldNotReturnErrorWhenWorkersAlreadyDeployed(t *testing.T) {
// given
k8Client := CreateFakeClient()
_ = k8Client.CreateWorkerDeployment()

// when
err := k8Client.CreateWorkerDeployment()

// then
require.NoError(t, err)
deploymentList, err := k8Client.Clientset.AppsV1().Deployments(k8Client.GetCurrentNamespace()).List(context.TODO(), metav1.ListOptions{})
require.NoError(t, err)

assert.Equal(t, 1, len(deploymentList.Items))
assert.Equal(t, "worker", deploymentList.Items[0].Name)
}

func Test_ShouldDeployWorkerInSaas(t *testing.T) {
// given
k8Client := CreateFakeClient()
k8Client.createSaaSCRD(t)

// when
err := k8Client.CreateWorkerDeployment()

// then
require.NoError(t, err)
deploymentList, err := k8Client.Clientset.AppsV1().Deployments(k8Client.GetCurrentNamespace()).List(context.TODO(), metav1.ListOptions{})
require.NoError(t, err)

assert.Equal(t, 1, len(deploymentList.Items))
assert.Equal(t, "worker", deploymentList.Items[0].Name)
assert.Contains(t, deploymentList.Items[0].Spec.Template.Spec.Containers[0].Env[0].Value, "-Dapp.brokerUrl=zeebe-service:26500")
}

0 comments on commit 9861367

Please sign in to comment.