From 5609eddb7bd2eafd02e0f235ac211ea42d6ace55 Mon Sep 17 00:00:00 2001 From: Christopher Zell Date: Thu, 8 Dec 2022 13:46:42 +0100 Subject: [PATCH 1/2] fix: handle worker deployment in self-managed env In self-managed env the zeebe gateway service is called differently, based on the helm release name. The release name is in our setups normally also the namespace name, which we use here. If the worker is already deployed we update the deployment, instead of failing. Added missing tests for it. --- go-chaos/internal/deployment.go | 23 ++++++++++++++++++ go-chaos/internal/deployment_test.go | 36 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/go-chaos/internal/deployment.go b/go-chaos/internal/deployment.go index 4313d2ea9..172dd22df 100644 --- a/go-chaos/internal/deployment.go +++ b/go-chaos/internal/deployment.go @@ -20,6 +20,7 @@ import ( "embed" "errors" "fmt" + "strings" v12 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -64,6 +65,28 @@ func (c K8Client) CreateWorkerDeployment() error { return err } + if !c.SaaSEnv { + // We are in self-managed environment + // We have to update the service url such that our workers can connect + // We expect that the used helm release name is == to the namespace name + + // JAVA_OPTIONS + envVar := deployment.Spec.Template.Spec.Containers[0].Env[0] + envVar.Value = strings.Replace(envVar.Value, "zeebe-service:26500", fmt.Sprintf("%s-zeebe-gateway:26500", c.GetCurrentNamespace()), 1) + deployment.Spec.Template.Spec.Containers[0].Env[0] = envVar + } + _, err = c.Clientset.AppsV1().Deployments(c.GetCurrentNamespace()).Create(context.TODO(), deployment, metav1.CreateOptions{}) + + if err != nil { + if err.Error() == "deployments.apps \"worker\" already exists" { + LogInfo("Workers have already deployed, update deployment.") + _, err = c.Clientset.AppsV1().Deployments(c.GetCurrentNamespace()).Update(context.TODO(), deployment, metav1.UpdateOptions{}) + if err != nil { + return err + } + return nil + } + } return err } diff --git a/go-chaos/internal/deployment_test.go b/go-chaos/internal/deployment_test.go index 32b3a8379..58eb795b1 100644 --- a/go-chaos/internal/deployment_test.go +++ b/go-chaos/internal/deployment_test.go @@ -112,4 +112,40 @@ func Test_ShouldDeployWorkerDeployment(t *testing.T) { assert.Equal(t, 1, len(deploymentList.Items)) assert.Equal(t, "worker", deploymentList.Items[0].Name) + assert.Contains(t, deploymentList.Items[0].Spec.Template.Spec.Containers[0].Env[0].Value, "-Dapp.brokerUrl=testNamespace-zeebe-gateway:26500") +} + +func Test_ShouldNotReturnErrorWhenWorkersAlreadyDeployed(t *testing.T) { + // given + k8Client := CreateFakeClient() + _ = k8Client.CreateWorkerDeployment() + + // when + err := k8Client.CreateWorkerDeployment() + + // then + require.NoError(t, err) + deploymentList, err := k8Client.Clientset.AppsV1().Deployments(k8Client.GetCurrentNamespace()).List(context.TODO(), metav1.ListOptions{}) + require.NoError(t, err) + + assert.Equal(t, 1, len(deploymentList.Items)) + assert.Equal(t, "worker", deploymentList.Items[0].Name) +} + +func Test_ShouldDeployWorkerInSaas(t *testing.T) { + // given + k8Client := CreateFakeClient() + k8Client.createSaaSCRD(t) + + // when + err := k8Client.CreateWorkerDeployment() + + // then + require.NoError(t, err) + deploymentList, err := k8Client.Clientset.AppsV1().Deployments(k8Client.GetCurrentNamespace()).List(context.TODO(), metav1.ListOptions{}) + require.NoError(t, err) + + assert.Equal(t, 1, len(deploymentList.Items)) + assert.Equal(t, "worker", deploymentList.Items[0].Name) + assert.Contains(t, deploymentList.Items[0].Spec.Template.Spec.Containers[0].Env[0].Value, "-Dapp.brokerUrl=zeebe-service:26500") } From e4887b9e2f6e3285f37b970d2aa9335e408663c8 Mon Sep 17 00:00:00 2001 From: Christopher Zell Date: Thu, 8 Dec 2022 13:48:03 +0100 Subject: [PATCH 2/2] feat: migrate worker restart --- .../worker-restart/experiment.json | 63 ++++++++++++++++--- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/production-s/worker-restart/experiment.json b/go-chaos/internal/chaos-experiments/camunda-cloud/production-s/worker-restart/experiment.json index 91cc72f26..13454f1a7 100644 --- a/go-chaos/internal/chaos-experiments/camunda-cloud/production-s/worker-restart/experiment.json +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/production-s/worker-restart/experiment.json @@ -15,18 +15,30 @@ "tolerance": 0, "provider": { "type": "process", - "path": "verify-readiness.sh", + "path": "zbchaos", + "arguments": ["verify", "readiness"], "timeout": 900 } }, { - "name": "Should be able to create a process and await the result", + "name": "Can deploy process model", "type": "probe", "tolerance": 0, "provider": { "type": "process", - "path": "await-processes-with-result.sh", - "arguments": "1", + "path": "zbchaos", + "arguments": ["deploy", "process"], + "timeout": 900 + } + }, + { + "name": "Should be able to create process instances on partition 1", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "instance-creation", "--partitionId", "1"], "timeout": 900 } } @@ -34,17 +46,54 @@ }, "method": [ { - "type": "action", - "name": "Restart worker pod", + "name": "Deploy Workers", + "type": "probe", "tolerance": 0, "provider": { "type": "process", - "path": "terminate-workers.sh", + "path": "zbchaos", + "arguments": ["deploy", "worker"], "timeout": 900 }, "pauses": { "after": 5 } + }, + { + "name": "Should be able to create a process and await the result", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "instance-creation", "--partitionId", "1", "--awaitResult"], + "timeout": 900 + } + }, + { + "name": "Restart Workers", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["restart", "worker"], + "timeout": 900 + }, + "pauses": { + "after": 5 + } + }, + { + "name": "Should be able to create a process and await the result", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "instance-creation", "--partitionId", "1", "--awaitResult"], + "timeout": 900 + } } ], "rollbacks": []