diff --git a/Dockerfile b/Dockerfile index 5fc35064..90f57a26 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN set -x \ && export GNUPGHOME="$(mktemp -d)" \ && gpg --keyserver ha.pool.sks-keyservers.net --recv-keys "$TINI_GPG_KEY" \ && gpg --batch --verify tini.asc tini \ - && rm -r "$GNUPGHOME" tini.asc \ + && rm -rf "$GNUPGHOME" tini.asc \ && mv tini /usr/bin/tini \ && chmod +x /usr/bin/tini \ && tini -- true \ diff --git a/README.md b/README.md index 4e0eadb3..52ab9d44 100644 --- a/README.md +++ b/README.md @@ -278,7 +278,6 @@ The default value when not specified is `redispatch,http-server-close,dontlognul < HTTP/1.1 200 OK ``` * Some of the features of marathon-lb assume that it is the only instance of itself running in a PID namespace. i.e. marathon-lb assumes that it is running in a container. Certain features like the `/_mlb_signal` endpoints and the `/_haproxy_getpids` endpoint (and by extension, zero-downtime deployments) may behave unexpectedly if more than one instance of marathon-lb is running in the same PID namespace or if there are other HAProxy processes in the same PID namespace. - * You may want to set the `HAPROXY_RELOAD_SIGTERM_DELAY` environment variable to a value such as `5m`. This value is passed directly to the `sleep` command, which is executed after every HAProxy reload before sending a SIGTERM to the old HAProxy PIDs (see [service/haproxy/run](service/haproxy/run)). For cases where you expect long-lived TCP connections, you may _not_ want to terminate HAProxy before all connections finish. See [this discussion](http://www.serverphorums.com/read.php?10,862139) for more on HAProxy reloads, and issues [#5](https://github.com/mesosphere/marathon-lb/issues/5), [#71](https://github.com/mesosphere/marathon-lb/issues/71), [#267](https://github.com/mesosphere/marathon-lb/issues/267), [#276](https://github.com/mesosphere/marathon-lb/issues/276), and [#318](https://github.com/mesosphere/marathon-lb/issues/318) for more. If you are reloading so frequently that PIDs are being reused within the delay you specify, this may result in SIGTERMs being sent to the wrong PIDs. ## Zero-downtime Deployments diff --git a/haproxy_wrapper.py b/haproxy_wrapper.py new file mode 100755 index 00000000..e80b5045 --- /dev/null +++ b/haproxy_wrapper.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +import os +import sys +import time +import errno + + +def create_haproxy_pipe(): + pipefd = os.pipe() + return pipefd + + +def close_and_swallow(fd): + try: + os.close(fd) + except OSError: + # swallow + pass + + +def wait_on_haproxy_pipe(pipefd): + try: + ret = os.read(pipefd[0], 1) + if len(ret) == 0: + close_and_swallow(pipefd[0]) + close_and_swallow(pipefd[1]) + except OSError as e: + if e.args[0] != errno.EINTR: + close_and_swallow(pipefd[0]) + close_and_swallow(pipefd[1]) + return False + return True + +pipefd = create_haproxy_pipe() + +pid = os.fork() + +if not pid: + os.environ["HAPROXY_WRAPPER_FD"] = str(pipefd[1]) + # Close the read side + os.close(pipefd[0]) + os.execv(sys.argv[1], sys.argv[1:]) + +# Close the write side +os.close(pipefd[1]) +while wait_on_haproxy_pipe(pipefd): + time.sleep(0.005) +sys.exit(0) diff --git a/run b/run index 42dd7412..63ade17f 100755 --- a/run +++ b/run @@ -16,11 +16,6 @@ else exit 1 fi -if [ -n "${HAPROXY_RELOAD_SIGTERM_DELAY-}" ]; then - echo $HAPROXY_RELOAD_SIGTERM_DELAY > $HAPROXY_SERVICE/env/HAPROXY_RELOAD_SIGTERM_DELAY -fi - - # Find the --ssl-certs arg if one was provided, # get the certs and remove them and the arg from the list # of positional parameters so we don't duplicate them diff --git a/service/haproxy/run b/service/haproxy/run index aca2290a..a7d1a078 100755 --- a/service/haproxy/run +++ b/service/haproxy/run @@ -33,9 +33,10 @@ reload() { # Trigger reload LATEST_HAPROXY_PID=$(cat $PIDFILE) - haproxy -p $PIDFILE -f /marathon-lb/haproxy.cfg -D -sf $LATEST_HAPROXY_PID 200>&- - if [ -n "${HAPROXY_RELOAD_SIGTERM_DELAY-}" ]; then - sleep $HAPROXY_RELOAD_SIGTERM_DELAY && kill $LATEST_HAPROXY_PID 200>&- 2>/dev/null & + /marathon-lb/haproxy_wrapper.py `which haproxy` -p $PIDFILE -f /marathon-lb/haproxy.cfg -sf $LATEST_HAPROXY_PID 200>&- + local exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "HAProxy reload failed" 1>&2 fi # Remove the firewall rules