From 7c922018d815a638146b744a4cbbef8423bb758c Mon Sep 17 00:00:00 2001 From: "liheng.zms" Date: Thu, 28 Jul 2022 17:54:08 +0800 Subject: [PATCH] pod probe marker proposal Signed-off-by: liheng.zms --- docs/proposals/20220728-pod-probe-marker.md | 195 ++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 docs/proposals/20220728-pod-probe-marker.md diff --git a/docs/proposals/20220728-pod-probe-marker.md b/docs/proposals/20220728-pod-probe-marker.md new file mode 100644 index 0000000000..3ccc20d8c4 --- /dev/null +++ b/docs/proposals/20220728-pod-probe-marker.md @@ -0,0 +1,195 @@ +--- +title: PodProbeMarker +authors: +- "@zmberg" +reviewers: +- "@furykerry" +- "@FillZpp" +creation-date: 2022-08-09 +last-updated: 2021-08-24 +status: implementable +--- + +# Pod Probe Marker + +## Table of Contents + +A table of contents is helpful for quickly jumping to sections of a proposal and for highlighting +any additional information provided beyond the standard proposal template. +[Tools for generating](https://github.com/ekalinin/github-markdown-toc) a table of contents from markdown are available. + +- [Title](#title) +- [Table of Contents](#table-of-contents) +- [Motivation](#motivation) +- [Proposal](#proposal) + - [API Definition](#api-definition) + - [Implementation](#implementation) + - [Relationship With Startup Probe](#relationship-with-startup-probe) + +## Motivation +Kubernetes provides two probes by default for Pod lifecycle management: +- **Readiness Probe** is used to determine whether the business container is ready to respond to requests, and if it fails, the Pod will be removed from the Service Endpoints. +- **Liveness Probe** is used to determine the health status of the container, and if it fails, Kebelet will restart the Container. + +**So K8S on the provision of Probe capabilities are limited to specific semantics and behavior. In addition, there are some business applications that have the need to customize the semantics and behavior of the Probe**, such as: +- GameServer defines Idle Probe to determine whether there is a game match for the current Pod, if there is no match, from cost optimization considerations can be given priority to offline this Pod +- Operator defines Master-Slave Probe to determine the role of the current Pod (master or slave), and upgrade the Slave node in priority + +## Proposal +OpenKruise provides the ability to customize Container Probe, Kruise Daemon executes customize Probe scripts and returns the results to the Pod yaml. + +### API Definition +```yaml +apiVersion: apps.kruise.io/v1alpha1 +kind: PodProbeMarker +metadata: + name: game-server-probe +spec: + selector: + matchLabels: + app: game-server + containers: + - name: gameserver + probes + - name: Idle + exec: /home/game/idle.sh + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + - name: Healthy + tcpSocket: + port: 8899 + markItems: + # When probe execution is successful + - probeName: Healthy + expectation: succeeded + labels: + gameserver-healthy: 'true' + - probeName: Healthy + expectation: failed + labels: + gameserver-healthy: 'false' + - probeName: Idle + expectation: succeeded + annotations: + controller.kubernetes.io/pod-deletion-cost: '-1' + - probeName: Idle + expectation: failed + annotations: + controller.kubernetes.io/pod-deletion-cost: '1' + +apiVersion: apps.kruise.io/v1alpha1 +kind: PodProbeMarker +metadata: + name: web-server-probe +spec: + selector: + matchLabels: + app: web-server + containers: + - name: web-server + probes + - name: Healthy + exec: /home/web/healthy.sh + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + markItems: + # When probe execution is successful + - probeName: Healthy + podCondition: + type: healthy + +# for kruise daemon +apiVersion: apps.kruise.io/v1alpha1 +kind: NodePodProbe +metadata: + name: node-name +spec: + - podName: gameserver-0 + containers: + - gameserver + probes + - name: Idle + exec: /home/game/idle.sh + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + - name: Healthy + tcpSocket: + port: 8899 + - podName: web-server-xxxxxx + containers: + - name: web-server + probes + - name: Healthy + exec: /home/web/healthy.sh + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + +status: + - podName: gameserver-0 + containers: + - name: gameserver + probes: + - name: Idle + result: failed + - name: Healthy + result: succeeded + - podName: web-server-xxxxxx + containers: + - name: web-server + probes: + - name: Healthy + result: succeeded + + +// probe result +apiVersion: v1 +kind: Pod +metadata: + name: gameserver-0 + labels: + gameserver-healthy: 'true' + annotations: + controller.kubernetes.io/pod-deletion-cost: '1' +spec: + ... +status: + conditions: + - type: Idle + status: false + - type: Healthy + status: true + +apiVersion: v1 +kind: Pod +metadata: + name: web-server-xxxxxx +spec: + ... +status: + conditions: + - type: Healthy + status: True +``` + +### Implementation +- **pod-probe-controller**: Responsible for automatically generating and cleaning NodePodProbe resources (one per Node) based on PodProbeMarker +and writing the results back to Pod yaml based on NodePodProbe Status +- **kruise-daemon**: Responsible for executing the probe (EXEC, HTTP) and returning the results to NodePodProbe Status + +### Relationship With Startup Probe +StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. +So Kruise PodProbeMarker will be executed until StartupProbe succeed. + +