-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: verify cluster can survive dataloss of one broker at a time. (#275
) After a broker recovered from loss of disk, cluster should be able to survive another broker's disk loss. After a series of loss of disk of one broker at a time, the cluster should not suffer dataloss. We verify this by creating instances of the process that is deployed before the disk loss. In this we don't have to call `zbchaos dataloss prepare` because there is no need to add init containers. Since we are only deleting one broker at a time, the pod can be immediately restarted. related to #4
- Loading branch information
Showing
1 changed file
with
145 additions
and
0 deletions.
There are no files selected for viewing
145 changes: 145 additions & 0 deletions
145
...aos/internal/chaos-experiments/camunda-cloud/production-s/broker-dataloss/experiment.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
{ | ||
"version": "0.1.0", | ||
"title": "Zeebe dataloss experiment", | ||
"description": "Zeebe should be able to handle data loss of one broker at a time.", | ||
"contributions": { | ||
"reliability": "high", | ||
"availability": "high" | ||
}, | ||
"steady-state-hypothesis": { | ||
"title": "Zeebe is alive", | ||
"probes": [ | ||
{ | ||
"name": "All pods should be ready", | ||
"type": "probe", | ||
"tolerance": 0, | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": [ | ||
"verify", "readiness" | ||
], | ||
"timeout": 900 | ||
} | ||
} | ||
] | ||
}, | ||
"method": [ | ||
{ | ||
"type": "action", | ||
"name": "Deploy process", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["deploy", "process"] | ||
}, | ||
"timeout": 900 | ||
}, | ||
{ | ||
"type": "action", | ||
"name": "Delete data of broker 0 and restart the pod", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["dataloss", "delete", "--nodeId=0"] | ||
}, | ||
"pauses": { | ||
"after": 60 | ||
} | ||
}, | ||
{ | ||
"type": "probe", | ||
"name": "Broker 0 can recover after data loss", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["verify", "readiness"] | ||
}, | ||
"timeout": 900 | ||
}, | ||
{ | ||
"type": "action", | ||
"name": "Delete data of broker 1 and restart the pod", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["dataloss", "delete", "--nodeId=1"] | ||
}, | ||
"pauses": { | ||
"after": 60 | ||
} | ||
}, | ||
{ | ||
"type": "probe", | ||
"name": "Broker 1 can recover after data loss", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["verify", "readiness"] | ||
}, | ||
"timeout": 900 | ||
}, | ||
{ | ||
"type": "action", | ||
"name": "Delete data of broker 2 and restart the pod", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["dataloss", "delete", "--nodeId=2"] | ||
}, | ||
"pauses": { | ||
"after": 60 | ||
} | ||
}, | ||
{ | ||
"type": "probe", | ||
"name": "Broker 2 can recover after data loss", | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": ["verify", "readiness"] | ||
}, | ||
"timeout": 900 | ||
}, | ||
{ | ||
"name": "There is no data loss. Should be able to create process instances on partition 1", | ||
"type": "probe", | ||
"tolerance": 0, | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": [ | ||
"verify", "instance-creation", "--partitionId=1" | ||
], | ||
"timeout": 900 | ||
} | ||
}, | ||
{ | ||
"name": "There is no data loss. Should be able to create process instances on partition 2", | ||
"type": "probe", | ||
"tolerance": 0, | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": [ | ||
"verify", "instance-creation", "--partitionId=2" | ||
], | ||
"timeout": 900 | ||
} | ||
}, | ||
{ | ||
"name": "There is no data loss. Should be able to create process instances on partition 3", | ||
"type": "probe", | ||
"tolerance": 0, | ||
"provider": { | ||
"type": "process", | ||
"path": "zbchaos", | ||
"arguments": [ | ||
"verify", "instance-creation", "--partitionId=3" | ||
], | ||
"timeout": 900 | ||
} | ||
} | ||
], | ||
"rollbacks": [] | ||
} |