Skip to content
This repository has been archived by the owner on Dec 4, 2024. It is now read-only.

Commit

Permalink
Add container syslogd support and fix 'Waiting for Pids' race conditi…
Browse files Browse the repository at this point in the history
…on (#505)

* Add additional debug logs in reload
* Adding syslog support
* Make syslogd and retry reload configurable
* Update documentation
* Make infinite retries optional, run syslogd with runsvdir
  • Loading branch information
drewkerrigan authored Oct 9, 2017
1 parent 6418441 commit 27df025
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 19 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ FROM debian:stretch
# runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
inetutils-syslogd \
iptables \
libcurl3 \
liblua5.3-0 \
Expand Down
13 changes: 11 additions & 2 deletions Longhelp.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ Marathon-lb just needs to know where to find Marathon.
```
usage: marathon_lb.py [-h] [--longhelp] [--marathon MARATHON [MARATHON ...]]
[--haproxy-config HAPROXY_CONFIG] [--group GROUP]
[--command COMMAND] [--strict-mode] [--sse]
[--health-check]
[--command COMMAND]
[--max-reload-retries MAX_RELOAD_RETRIES]
[--reload-interval RELOAD_INTERVAL] [--strict-mode]
[--sse] [--health-check]
[--lru-cache-capacity LRU_CACHE_CAPACITY]
[--haproxy-map] [--dont-bind-http-https]
[--ssl-certs SSL_CERTS] [--skip-validation] [--dry]
Expand Down Expand Up @@ -51,6 +53,13 @@ optional arguments:
--command COMMAND, -c COMMAND
If set, run this command to reload haproxy. (default:
None)
--max-reload-retries MAX_RELOAD_RETRIES
Max reload retries before failure. Reloads happen
every --reload-interval seconds. Set to 0 to disable
or -1 for infinite retries. (default: 10)
--reload-interval RELOAD_INTERVAL
Wait this number of seconds betwee nreload retries.
(default: 10)
--strict-mode If set, backends are only advertised if
HAPROXY_{n}_ENABLED=true. Strict mode will be enabled
by default in a future release. (default: False)
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,16 @@ The default value when not specified is `redispatch,http-server-close,dontlognul
< HTTP/1.1 200 OK
```
* Some of the features of marathon-lb assume that it is the only instance of itself running in a PID namespace. i.e. marathon-lb assumes that it is running in a container. Certain features like the `/_mlb_signal` endpoints and the `/_haproxy_getpids` endpoint (and by extension, zero-downtime deployments) may behave unexpectedly if more than one instance of marathon-lb is running in the same PID namespace or if there are other HAProxy processes in the same PID namespace.
* Sometimes it is desirable to get detailed container and HAProxy logging for easier debugging as well as viewing connection logging to frontends and backends. This can be achieved by setting the `HAPROXY_SYSLOGD` environment variable or `container-syslogd` value in `options.json` like so:

```json
{
"marathon-lb": {
"container-syslogd": true
}
}
```


## Zero-downtime Deployments

Expand Down
20 changes: 19 additions & 1 deletion haproxy_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,53 @@
import sys
import time
import errno
import logging

logger = logging.getLogger('haproxy_wrapper')
logger.setLevel(getattr(logging, 'DEBUG'))
formatter = logging.Formatter("%(asctime)-15s %(name)s: %(message)s")
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(formatter)
logger.addHandler(consoleHandler)


def create_haproxy_pipe():
logger.debug("create_haproxy_pipe called")
pipefd = os.pipe()
os.set_inheritable(pipefd[0], True)
os.set_inheritable(pipefd[1], True)
logger.debug("create_haproxy_pipe done")
return pipefd


def close_and_swallow(fd):
logger.debug("close_and_swallow called")
try:
os.close(fd)
except OSError:
logger.debug("close_and_swallow successful")
except OSError as e:
# swallow
logger.debug("close_and_swallow swallow OSError: %s", e)
pass


def wait_on_haproxy_pipe(pipefd):
logger.debug("wait_on_haproxy_pipe called")
try:
ret = os.read(pipefd[0], 1)
if len(ret) == 0:
close_and_swallow(pipefd[0])
close_and_swallow(pipefd[1])
logger.debug("wait_on_haproxy_pipe done (False)")
return False
except OSError as e:
logger.debug("wait_on_haproxy_pipe OSError: %s", e)
if e.args[0] != errno.EINTR:
close_and_swallow(pipefd[0])
close_and_swallow(pipefd[1])
logger.debug("wait_on_haproxy_pipe done (False)")
return False
logger.debug("wait_on_haproxy_pipe done (True)")
return True


Expand Down
52 changes: 47 additions & 5 deletions marathon_lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,8 @@ def get_haproxy_pids():
"pidof haproxy",
stderr=subprocess.STDOUT,
shell=True).split()))
except subprocess.CalledProcessError:
except subprocess.CalledProcessError as ex:
logger.debug("Unable to get haproxy pids: %s", ex)
return set()


Expand Down Expand Up @@ -646,14 +647,46 @@ def reloadConfig():
logger.info("reloading using %s", " ".join(reloadCommand))
try:
start_time = time.time()
checkpoint_time = start_time
# Retry or log the reload every 10 seconds
reload_frequency = args.reload_interval
reload_retries = args.max_reload_retries
enable_retries = True
infinite_retries = False
if reload_retries == 0:
enable_retries = False
elif reload_retries < 0:
infinite_retries = True
old_pids = get_haproxy_pids()
subprocess.check_call(reloadCommand, close_fds=True)
new_pids = get_haproxy_pids()
logger.debug("Waiting for new haproxy pid (old pids: [%s], " +
"new_pids: [%s])...", old_pids, new_pids)
# Wait until the reload actually occurs and there's a new PID
while len(get_haproxy_pids() - old_pids) < 1:
logger.debug("Waiting for new haproxy pid...")
while True:
if len(new_pids - old_pids) >= 1:
logger.debug("new pids: [%s]", new_pids)
logger.debug("reload finished, took %s seconds",
time.time() - start_time)
break
timeSinceCheckpoint = time.time() - checkpoint_time
if (timeSinceCheckpoint >= reload_frequency):
logger.debug("Still waiting for new haproxy pid after " +
"%s seconds (old pids: [%s], " +
"new_pids: [%s]).",
time.time() - start_time, old_pids, new_pids)
checkpoint_time = time.time()
if enable_retries:
if not infinite_retries:
reload_retries -= 1
if reload_retries == 0:
logger.debug("reload failed after %s seconds",
time.time() - start_time)
break
logger.debug("Attempting reload again...")
subprocess.check_call(reloadCommand, close_fds=True)
time.sleep(0.1)
logger.debug("reload finished, took %s seconds",
time.time() - start_time)
new_pids = get_haproxy_pids()
except OSError as ex:
logger.error("unable to reload config using command %s",
" ".join(reloadCommand))
Expand Down Expand Up @@ -1602,6 +1635,15 @@ def get_arg_parser():
parser.add_argument("--command", "-c",
help="If set, run this command to reload haproxy.",
default=None)
parser.add_argument("--max-reload-retries",
help="Max reload retries before failure. Reloads"
" happen every --reload-interval seconds. Set to"
" 0 to disable or -1 for infinite retries.",
type=int, default=10)
parser.add_argument("--reload-interval",
help="Wait this number of seconds between"
" reload retries.",
type=int, default=10)
parser.add_argument("--strict-mode",
help="If set, backends are only advertised if"
" HAPROXY_{n}_ENABLED=true. Strict mode will be"
Expand Down
32 changes: 27 additions & 5 deletions run
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
#!/bin/bash
set -euo pipefail

LOG_PREFIX="$(pwd) $0"
log() {
logline="[$LOG_PREFIX] $1\n"
printf "$logline" >&1
}
log_error() {
logline="[$LOG_PREFIX] $1\n"
printf "$logline" >&1
printf "$logline" >&2
}

if [ -n "${HAPROXY_SYSLOGD-}" ]; then
SYSLOGD_SERVICE="/marathon-lb/service/syslogd"
mkdir -p $SYSLOGD_SERVICE
cp /marathon-lb/syslogd/run "$SYSLOGD_SERVICE/"
fi

# Custom syslog socket for marathon-lb.py logging
SYSLOG_SOCKET=${SYSLOG_SOCKET:-/dev/null}

LB_SERVICE="/marathon-lb/service/lb"
Expand All @@ -10,9 +28,9 @@ HAPROXY_SERVICE="/marathon-lb/service/haproxy"
mkdir -p $HAPROXY_SERVICE/env

if [ -n "${PORTS-}" ]; then
echo $PORTS > $HAPROXY_SERVICE/env/PORTS
log "$PORTS > $HAPROXY_SERVICE/env/PORTS"
else
echo 'Define $PORTS with a comma-separated list of ports to which HAProxy binds' >&2
log_error "Define $PORTS with a comma-separated list of ports to which HAProxy binds"
exit 1
fi

Expand Down Expand Up @@ -64,7 +82,7 @@ if [ -n "${MESOS_SANDBOX-}" ] && [ -d "$MESOS_SANDBOX/templates" ]; then
fi

if [ -n "${HAPROXY_SYSCTL_PARAMS-}" ]; then
echo "setting sysctl params to: ${HAPROXY_SYSCTL_PARAMS}"
log "setting sysctl params to: ${HAPROXY_SYSCTL_PARAMS}"
if [ -n "${HAPROXY_SYSCTL_NONSTRICT-}" ]; then
# ignore errors
sysctl -w $HAPROXY_SYSCTL_PARAMS || true
Expand All @@ -83,12 +101,12 @@ case "$MODE" in
ARGS="--sse"
;;
*)
echo "Unknown mode $MODE. Synopsis: $0 poll|sse [marathon_lb.py args]" >&2
log_error "Unknown mode $MODE. Synopsis: $0 poll|sse [marathon_lb.py args]"
exit 1
;;
esac

if [ -n "${VAULT_TOKEN-}" ] && [ -n "${VAULT_HOST-}" ] && [ -n "${VAULT_PORT-}" ] && [ -n "${VAULT_PATH-}" ]; then
if [ -n "${VAULT_TOKEN-}" ] && [ -n "${VAULT_HOST-}" ] && [ -n "${VAULT_PORT-}" ] && [ -n "${VAULT_PATH-}" ]; then
MARATHON_LB_PASSWORD=$(curl -k -L -H "X-Vault-Token:$VAULT_TOKEN" "$VAULT_URL" -s| python -m json.tool | python -c 'import json,sys;obj=json.load(sys.stdin);print obj["data"]["pass"]')
MARATHON_LB_USER=$(curl -k -L -H "X-Vault-Token:$VAULT_TOKEN" "$VAULT_URL" -s | python -m json.tool | python -c 'import json,sys;obj=json.load(sys.stdin);print obj["data"]["user"]')
CREDENTIALS="$MARATHON_LB_USER:$MARATHON_LB_PASSWORD"
Expand All @@ -113,6 +131,10 @@ exec /marathon-lb/marathon_lb.py \
EOF
chmod 755 $LB_SERVICE/run

log "Created $LB_SERVICE/run with contents:"
LB_RUN=$(cat $LB_SERVICE/run)
log "$LB_RUN"

if [ "${MODE}" == "poll" ]; then

cat > $LB_SERVICE/finish << EOF
Expand Down
34 changes: 28 additions & 6 deletions service/haproxy/run
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@
exec 2>&1
export PIDFILE="/tmp/haproxy.pid"

LOG_PREFIX="$(pwd) $0"
log() {
logline="[$LOG_PREFIX] $1\n"
printf "$logline" >&1
}
log_error() {
logline="[$LOG_PREFIX] $1\n"
printf "$logline" >&1
printf "$logline" >&2
}

addFirewallRules() {
IFS=',' read -ra ADDR <<< "$PORTS"
for i in "${ADDR[@]}"; do
Expand All @@ -17,33 +28,44 @@ removeFirewallRules() {
}

reload() {
echo "Reloading haproxy"
log "Reloading haproxy"

(
flock 200

# Begin to drop SYN packets with firewall rules
log "Dropping SYN packets with addFirewallRules"
addFirewallRules

# Wait to settle
sleep 0.1
log "addFirewallRules done"

# Save the current HAProxy state
log "Saving the current HAProxy state"
socat /var/run/haproxy/socket - <<< "show servers state" > /var/state/haproxy/global
log "Done saving the current HAProxy state"

# Trigger reload
LATEST_HAPROXY_PID=$(cat $PIDFILE)
/marathon-lb/haproxy_wrapper.py `which haproxy` -D -p $PIDFILE -f /marathon-lb/haproxy.cfg -sf $LATEST_HAPROXY_PID 200>&-
log "LATEST_HAPROXY_PID: [$LATEST_HAPROXY_PID]"

WHICH_HAPROXY=$(which haproxy)

log "/marathon-lb/haproxy_wrapper.py $WHICH_HAPROXY -D -p $PIDFILE -f /marathon-lb/haproxy.cfg -sf $LATEST_HAPROXY_PID 200>&-"
/marathon-lb/haproxy_wrapper.py $WHICH_HAPROXY -D -p $PIDFILE -f /marathon-lb/haproxy.cfg -sf $LATEST_HAPROXY_PID 200>&-
local exit_code=$?
log "exit code: $exit_code"
if [ $exit_code -ne 0 ]; then
echo "HAProxy reload failed" 1>&2
log_error "HAProxy reload failed"
fi

# Remove the firewall rules
log "Removing firewall rules with removeFirewallRules"
removeFirewallRules
log "removeFirewallRules done"

# Need to wait 1s to prevent TCP SYN exponential backoff
sleep 1

log "Reload finished"
) 200>/var/run/haproxy/lock
}

Expand Down
9 changes: 9 additions & 0 deletions syslogd/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
exec 2>&1

# A hack to redirect syslog to stdout.
# This is necessary because haproxy only logs to syslog.
ln -sf /proc/$$/fd/1 /var/log/container_stdout

# Start syslog in foreground
/usr/sbin/syslogd -f "/marathon-lb/syslogd/syslog.conf" -n
1 change: 1 addition & 0 deletions syslogd/syslog.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.*;auth,authpriv.none -/var/log/container_stdout

0 comments on commit 27df025

Please sign in to comment.