From 5069fe7e34ba7ff5fd62e99ef83cd2741232d48d Mon Sep 17 00:00:00 2001 From: Karel van IJperen Date: Tue, 19 Sep 2023 19:17:49 +0200 Subject: [PATCH] Remove outdated kill method to stop sidecar Sending the kill signal was needed for Istio < v1.3 and 1.3 has been released 4 years ago. If the `ENVOY_ADMIN_API` is set to the default port `15000` the `ISTIO_QUIT_API` configured correctly by default. On deployments that restart the containers the shutdown behaviour might be undesirable. The Istio sidecar will be restarted but when the pod has multiple containers that means that meaningful work in the containers that did not crash could be interrupted. So for pods that have multiple containers that need traffic through the service mesh, it is recommended to set `NEVER_KILL_ISTIO` to `true`. --- README.md | 27 ++-------------------- main.go | 29 ++---------------------- scuttle_config.go | 22 ++++++++++++++++-- scuttle_config_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 54 deletions(-) create mode 100644 scuttle_config_test.go diff --git a/README.md b/README.md index a5f431b..890d13c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ will poll indefinitely with backoff, waiting for envoy to report itself as live, All signals are passed to the underlying application. Be warned that `SIGKILL` cannot be passed, so this can leave behind a orphaned process. -When the application exits, unless `NEVER_KILL_ISTIO_ON_FAILURE` has been set and the exit code is non-zero, `scuttle` will instruct envoy to shut down immediately. +When the application exits, unless `NEVER_KILL_ISTIO_ON_FAILURE` or `NEVER_KILL_ISTIO` have been set to `true`, `scuttle` will instruct envoy to shut down immediately. ## Environment variables @@ -19,34 +19,11 @@ When the application exits, unless `NEVER_KILL_ISTIO_ON_FAILURE` has been set an | `SCUTTLE_LOGGING` | If provided and set to `true`, `scuttle` will log various steps to the console which is helpful for debugging | | `START_WITHOUT_ENVOY` | If provided and set to `true`, `scuttle` will not wait for envoy to be LIVE before starting the main application. However, it will still instruct envoy to exit. | | `WAIT_FOR_ENVOY_TIMEOUT` | If provided and set to a valid `time.Duration` string greater than 0 seconds, `scuttle` will wait for that amount of time before starting the main application. By default, it will wait indefinitely. If `QUIT_WITHOUT_ENVOY_TIMEOUT` is set as well, it will take precedence over this variable | -| `ISTIO_QUIT_API` | If provided `scuttle` will send a POST to `/quitquitquit` at the given API. Should be in format `http://127.0.0.1:15020`. This is intended for Istio v1.3 and higher. When not given, Istio will be stopped using a `pkill` command. | +| `ISTIO_QUIT_API` | This is the path to envoy's pilot agent interface, in the format `http://127.0.0.1:15020`. If not provided and the `ENVOY_ADMIN_API` is configured with the default port `15000`, the setting is configured automatically. If present (configured or deducted) `scuttle` will send a POST to `/quitquitquit` at the url. | | `GENERIC_QUIT_ENDPOINTS` | If provided `scuttle` will send a POST to the URL given. Multiple URLs are supported and must be provided as a CSV string. Should be in format `http://myendpoint.com` or `http://myendpoint.com,https://myotherendpoint.com`. The status code response is logged (if logging is enabled) but is not used. A 200 is treated the same as a 404 or 500. `GENERIC_QUIT_ENDPOINTS` is handled before Istio is stopped. | | `QUIT_REQUEST_TIMEOUT` | A deadline provided as a valid `time.Duration` string for requests to the `/quitquitquit` and/or the generic endpoints. If the deadline is exceeded `scuttle` gives up and exits cleanly. The default value is `5s`. | | `QUIT_WITHOUT_ENVOY_TIMEOUT` | If provided and set to a valid duration, `scuttle` will exit if Envoy does not become available before the end of the timeout and not continue with the passed in executable. If `START_WITHOUT_ENVOY` is also set, this variable will not be taken into account. Also, if `WAIT_FOR_ENVOY_TIMEOUT` is set, this variable will take precedence. | -## How Scuttle stops Istio - -Scuttle has two methods to stop Istio. You should configure Scuttle appropriately based on the version of Istio you are using. - -| Istio Version | Method | -|---------------|--------| -| 1.3 and higher| `/quitquitquit` endpoint | -| 1.2 and lower | `pkill` command | - -### Istio 1.3 and higher - -Version 1.3 of Istio introduced an endpoint `/quitquitquit` similar to Envoy. By default this endpoint is available at `http://127.0.0.1:15020` which is the Pilot Agent service, responsible for managing envoy. ([Source](https://github.com/istio/istio/issues/15041)) - -To enable this, set the environment variable `ISTIO_QUIT_API` to `http://127.0.0.1:15020`. - -### Istio 1.2 and lower - -Versions 1.2 and lower of Istio have no supported method to stop Istio Sidecars. As a workaround Scuttle stops Istio using the command `pkill -SIGINT pilot-agent`. - -To enable this, you must add `shareProcessNamespace: true` to your **Pod** definition in Kubernetes. This allows Scuttle to stop the service running on the sidecar container. - -*Note:* This method is used by default if `ISTIO_QUIT_API` is not set - ## Example usage in your Job's `Dockerfile` ```dockerfile diff --git a/main.go b/main.go index 0e8fa3c..251a008 100644 --- a/main.go +++ b/main.go @@ -127,11 +127,6 @@ func kill(exitCode int) { case config.NeverKillIstioOnFailure && exitCode != 0: log(fmt.Sprintf(logLineUnformatted, "Skipping Istio kill", "NEVER_KILL_ISTIO_ON_FAILURE is true", exitCode)) os.Exit(exitCode) - case config.IstioQuitAPI == "": - // No istio API sent, fallback to Pkill method - log(fmt.Sprintf(logLineUnformatted, "Stopping Istio with pkill", "ISTIO_QUIT_API is not set", exitCode)) - killGenericEndpoints() - killIstioWithPkill() default: // Stop istio using api log(fmt.Sprintf(logLineUnformatted, "Stopping Istio with API", "ISTIO_QUIT_API is set", exitCode)) @@ -167,35 +162,15 @@ func killGenericEndpoints() { func killIstioWithAPI() { log(fmt.Sprintf("Stopping Istio using Istio API '%s' (intended for Istio >v1.2)", config.IstioQuitAPI)) - responseSuccess := false ctx, cancel := context.WithTimeout(context.Background(), config.QuitRequestTimeout) defer cancel() url := fmt.Sprintf("%s/quitquitquit", config.IstioQuitAPI) code, err := postKill(ctx, url) if err != nil { log(fmt.Sprintf("Sent quitquitquit to Istio, error: %d", err)) - } else { - log(fmt.Sprintf("Sent quitquitquit to Istio, status code: %d", code)) - responseSuccess = code >= 200 && code < 300 - } - - if !responseSuccess && config.IstioFallbackPkill { - log(fmt.Sprintf("quitquitquit failed, will attempt pkill method")) - killIstioWithPkill() - } -} - -func killIstioWithPkill() { - log("Stopping Istio using pkill command (intended for Istio