diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index b907f6af9b45..f0d22411d6f9 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -4,6 +4,8 @@ DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include +EXTRA_DIST = $(top_srcdir)/cmd/zed/zed.d/README + sbin_PROGRAMS = zed zed_SOURCES = \ @@ -33,6 +35,7 @@ zed_LDADD = \ zedconfdir = $(sysconfdir)/zfs/zed.d dist_zedconf_DATA = \ + $(top_srcdir)/cmd/zed/zed.d/zed-functions.sh \ $(top_srcdir)/cmd/zed/zed.d/zed.rc zedexecdir = $(libexecdir)/zfs/zed.d @@ -40,29 +43,30 @@ zedexecdir = $(libexecdir)/zfs/zed.d dist_zedexec_SCRIPTS = \ $(top_srcdir)/cmd/zed/zed.d/all-debug.sh \ $(top_srcdir)/cmd/zed/zed.d/all-syslog.sh \ - $(top_srcdir)/cmd/zed/zed.d/checksum-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/checksum-notify.sh \ $(top_srcdir)/cmd/zed/zed.d/checksum-spare.sh \ - $(top_srcdir)/cmd/zed/zed.d/data-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/generic-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/io-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/data-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/generic-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/io-notify.sh \ $(top_srcdir)/cmd/zed/zed.d/io-spare.sh \ - $(top_srcdir)/cmd/zed/zed.d/resilver.finish-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/scrub.finish-email.sh + $(top_srcdir)/cmd/zed/zed.d/resilver.finish-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/scrub.finish-notify.sh zedconfdefaults = \ all-syslog.sh \ - checksum-email.sh \ + checksum-notify.sh \ checksum-spare.sh \ - data-email.sh \ - io-email.sh \ + data-notify.sh \ + io-notify.sh \ io-spare.sh \ - resilver.finish-email.sh \ - scrub.finish-email.sh + resilver.finish-notify.sh \ + scrub.finish-notify.sh -install-data-local: +install-data-hook: $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" for f in $(zedconfdefaults); do \ test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ done + chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/zed.d/README b/cmd/zed/zed.d/README new file mode 100644 index 000000000000..b4cb11514364 --- /dev/null +++ b/cmd/zed/zed.d/README @@ -0,0 +1,30 @@ +Shell scripts are the recommended choice for ZEDLETs that mostly call +other utilities and do relatively little data manipulation. + +Shell scripts MUST work on both bash and dash. + +Shell scripts MUST run cleanly through ShellCheck: + http://www.shellcheck.net/ + +General functions reside in "zed-functions.sh". Use them where applicable. + +Additional references that may be of use: + + Google Shell Style Guide + https://google-styleguide.googlecode.com/svn/trunk/shell.xml + + Dash as /bin/sh + https://wiki.ubuntu.com/DashAsBinSh + + Common shell script mistakes + http://www.pixelbeat.org/programming/shell_script_mistakes.html + + Filenames and Pathnames in Shell: How to do it Correctly + http://www.dwheeler.com/essays/filenames-in-shell.html + + Autoconf: Portable Shell Programming + https://www.gnu.org/software/autoconf/manual/autoconf.html#Portable-Shell + +Please BE CONSISTENT with the existing style, check for errors, +minimize dependencies where possible, try to be portable, +and comment anything non-obvious. Festina lente. diff --git a/cmd/zed/zed.d/all-debug.sh b/cmd/zed/zed.d/all-debug.sh index aa20ef268663..057e39b504b5 100755 --- a/cmd/zed/zed.d/all-debug.sh +++ b/cmd/zed/zed.d/all-debug.sh @@ -2,16 +2,23 @@ # # Log all environment variables to ZED_DEBUG_LOG. # -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# This can be a useful aid when developing/debugging ZEDLETs since it shows the +# environment variables defined for each zevent. -# Override the default umask to restrict access to a newly-created logfile. -umask 077 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +: "${ZED_DEBUG_LOG:="${TMPDIR:="/tmp"}/zed.debug.log"}" -# Append stdout to the logfile after obtaining an advisory lock. -exec >> "${ZED_DEBUG_LOG:=/tmp/zed.debug.log}" -flock -x 1 +lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock" + +umask 077 +zed_lock "${lockfile}" +exec >> "${ZED_DEBUG_LOG}" printenv | sort echo +exec >&- +zed_unlock "${lockfile}" exit 0 diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh index acf9e83bde3a..b34d17cef15c 100755 --- a/cmd/zed/zed.d/all-syslog.sh +++ b/cmd/zed/zed.d/all-syslog.sh @@ -1,11 +1,10 @@ #!/bin/sh # # Log the zevent via syslog. -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" -logger -t "${ZED_SYSLOG_TAG:=zed}" -p "${ZED_SYSLOG_PRIORITY:=daemon.notice}" \ - eid="${ZEVENT_EID}" class="${ZEVENT_SUBCLASS}" \ - "${ZEVENT_POOL:+pool=$ZEVENT_POOL}" +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" +zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \ + "${ZEVENT_POOL:+"pool=${ZEVENT_POOL}"}" exit 0 diff --git a/cmd/zed/zed.d/checksum-email.sh b/cmd/zed/zed.d/checksum-email.sh deleted file mode 120000 index f95bec21532a..000000000000 --- a/cmd/zed/zed.d/checksum-email.sh +++ /dev/null @@ -1 +0,0 @@ -io-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/checksum-notify.sh b/cmd/zed/zed.d/checksum-notify.sh new file mode 120000 index 000000000000..9008738073c9 --- /dev/null +++ b/cmd/zed/zed.d/checksum-notify.sh @@ -0,0 +1 @@ +io-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/data-email.sh b/cmd/zed/zed.d/data-email.sh deleted file mode 100755 index 543b8fe55c02..000000000000 --- a/cmd/zed/zed.d/data-email.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a DATA zevent. -# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool. -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# State File Format: -# POOL;TIME_OF_LAST_EMAIL -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 - -if test "${ZEVENT_SUBCLASS}" != "data"; then \ - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi - -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 - -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi - -NAME="zed.${ZEVENT_SUBCLASS}.email" -LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" -STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" - -# Obtain lock to ensure mutual exclusion for accessing state. -exec 8> "${LOCKFILE}" -flock -x 8 - -# Query state for last time email was sent for this pool. -TIME_NOW=`date +%s` -TIME_LAST=`egrep "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null | cut -d ";" -f2` -if test -n "${TIME_LAST}"; then - TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` - if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then - exit 2 - fi -fi - -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" </dev/null > "${STATEFILE}.$$" -echo "${ZEVENT_POOL};${TIME_NOW}" >> "${STATEFILE}.$$" -mv -f "${STATEFILE}.$$" "${STATEFILE}" - -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 -fi - -exit 0 diff --git a/cmd/zed/zed.d/data-notify.sh b/cmd/zed/zed.d/data-notify.sh new file mode 120000 index 000000000000..9008738073c9 --- /dev/null +++ b/cmd/zed/zed.d/data-notify.sh @@ -0,0 +1 @@ +io-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/generic-email.sh b/cmd/zed/zed.d/generic-email.sh deleted file mode 100755 index 357aedee5fa7..000000000000 --- a/cmd/zed/zed.d/generic-email.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a given zevent. -# This is a generic script than can be symlinked to a file in the zed -# enabled-scripts directory in order to have email sent when a particular -# class of zevents occurs. The symlink filename must begin with the zevent -# (sub)class string (eg, "probe_failure-email.sh" for the "probe_failure" -# subclass). Refer to the zed(8) manpage for details. -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email suppressed -# 3: missing executable -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 - -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi - -# Override the default umask to restrict access to the msgbody tmpfile. -umask 077 - -SUBJECT="ZFS ${ZEVENT_SUBCLASS} event" -test -n "${ZEVENT_POOL}" && SUBJECT="${SUBJECT} for ${ZEVENT_POOL}" -SUBJECT="${SUBJECT} on `hostname`" - -MSGBODY="${TMPDIR:=/tmp}/`basename \"$0\"`.$$" -{ - echo "A ZFS ${ZEVENT_SUBCLASS} event has been posted:" - echo - echo " eid: ${ZEVENT_EID}" - echo " host: `hostname`" - echo " time: ${ZEVENT_TIME_STRING}" - test -n "${ZEVENT_VDEV_TYPE}" -a -n "${ZEVENT_VDEV_PATH}" && \ - echo " vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}" - test -n "${ZEVENT_POOL}" -a -x "${ZPOOL}" && \ - "${ZPOOL}" status "${ZEVENT_POOL}" -} > "${MSGBODY}" - -test -f "${MSGBODY}" && "${MAIL}" -s "${SUBJECT}" "${ZED_EMAIL}" < "${MSGBODY}" -MAIL_STATUS=$? -rm -f "${MSGBODY}" - -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 -fi - -exit 0 diff --git a/cmd/zed/zed.d/generic-notify.sh b/cmd/zed/zed.d/generic-notify.sh new file mode 100755 index 000000000000..e438031a088a --- /dev/null +++ b/cmd/zed/zed.d/generic-notify.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# +# Send notification in response to a given zevent. +# +# This is a generic script than can be symlinked to a file in the +# enabled-zedlets directory to have a notification sent when a particular +# class of zevents occurs. The symlink filename must begin with the zevent +# (sub)class string (e.g., "probe_failure-notify.sh" for the "probe_failure" +# subclass). Refer to the zed(8) manpage for details. +# +# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given +# class/pool combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool. +# +# Exit codes: +# 0: notification sent +# 1: notification failed +# 2: notification not configured +# 3: notification suppressed + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +# Rate-limit the notification based in part on the filename. +# +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")" +rate_limit_interval="${ZED_NOTIFY_INTERVAL_SECS}" +zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3 + +umask 077 +pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}" +host_str=" on $(hostname)" +note_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}" +note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + echo "ZFS has posted the following event:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + + [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" + [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" + [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" + + [ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \ + && "${ZPOOL}" status "${ZEVENT_POOL}" + +} > "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/io-email.sh b/cmd/zed/zed.d/io-email.sh deleted file mode 100755 index 9edbe6670d94..000000000000 --- a/cmd/zed/zed.d/io-email.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a CHECKSUM or IO zevent. -# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool/vdev combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool/device. -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# State File Format: -# POOL;VDEV_PATH;TIME_OF_LAST_EMAIL -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 -test -n "${ZEVENT_VDEV_PATH}" || exit 5 - -if test "${ZEVENT_SUBCLASS}" != "checksum" \ - -a "${ZEVENT_SUBCLASS}" != "io"; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi - -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 - -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi - -NAME="zed.${ZEVENT_SUBCLASS}.email" -LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" -STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" - -# Obtain lock to ensure mutual exclusion for accessing state. -exec 8> "${LOCKFILE}" -flock -x 8 - -# Query state for last time email was sent for this pool/vdev. -TIME_NOW=`date +%s` -TIME_LAST=`egrep "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \ - 2>/dev/null | cut -d ";" -f3` -if test -n "${TIME_LAST}"; then - TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` - if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then - exit 2 - fi -fi - -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" </dev/null > "${STATEFILE}.$$" -echo "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${TIME_NOW}" >> "${STATEFILE}.$$" -mv -f "${STATEFILE}.$$" "${STATEFILE}" - -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 -fi - -exit 0 diff --git a/cmd/zed/zed.d/io-notify.sh b/cmd/zed/zed.d/io-notify.sh new file mode 100755 index 000000000000..3ce918ad70a7 --- /dev/null +++ b/cmd/zed/zed.d/io-notify.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# +# Send notification in response to a CHECKSUM, DATA, or IO error. +# +# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given +# class/pool/[vdev] combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool/[vdev]. +# +# Exit codes: +# 0: notification sent +# 1: notification failed +# 2: notification not configured +# 3: notification suppressed +# 9: internal error + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +[ -n "${ZEVENT_POOL}" ] || exit 9 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 + +if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \ + && [ "${ZEVENT_SUBCLASS}" != "data" ] \ + && [ "${ZEVENT_SUBCLASS}" != "io" ]; then + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 +fi + +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify" +zed_rate_limit "${rate_limit_tag}" || exit 3 + +umask 077 +note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" +note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + [ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a" + + echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + + [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" + [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" + [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" + + [ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \ + && echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}" + + [ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \ + && echo " read: ${ZEVENT_VDEV_READ_ERRORS}" + + [ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \ + && echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}" + + echo " pool: ${ZEVENT_POOL}" + +} > "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/io-spare.sh b/cmd/zed/zed.d/io-spare.sh index b64b2a9f1160..1835cb4a3d22 100755 --- a/cmd/zed/zed.d/io-spare.sh +++ b/cmd/zed/zed.d/io-spare.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Replace a device with a hot spare in response to IO or checksum errors. +# Replace a device with a hot spare in response to IO or CHECKSUM errors. # The following actions will be performed automatically when the number # of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or # ZED_SPARE_ON_CHECKSUM_ERRORS. @@ -21,106 +21,219 @@ # the majority of the expected hot spare functionality. # # Exit codes: -# 0: replaced by hot spare -# 1: no hot spare device available -# 2: hot sparing disabled -# 3: already faulted or degraded -# 4: unsupported event class -# 5: internal error -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 -test -n "${ZEVENT_VDEV_PATH}" || exit 5 -test -n "${ZEVENT_VDEV_GUID}" || exit 5 - -# Defaults to disabled, enable in the zed.rc file. -ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0} -ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0} - -if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \ - ${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then - exit 2 -fi - -# A lock file is used to serialize execution. -ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock} -LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock" - -exec 8> "${LOCKFILE}" -flock -x 8 - -# Given a and return the status, (ONLINE, FAULTED, etc...). -vdev_status() { - local POOL=$1 - local VDEV=`basename $2` - local T=' ' # tab character since '\t' isn't portable - - ${ZPOOL} status ${POOL} | sed -n -e \ - "s,^[ $T]*\(.*$VDEV\(-part[0-9]\+\)\?\)[ $T]*\([A-Z]\+\).*,\1 \3,p" - return 0 +# 0: hot spare replacement successful +# 1: hot spare device not available +# 2: hot sparing disabled or threshold not reached +# 3: device already faulted or degraded +# 9: internal error + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +# Disabled by default. Enable in the zed.rc file. +: "${ZED_SPARE_ON_CHECKSUM_ERRORS:=0}" +: "${ZED_SPARE_ON_IO_ERRORS:=0}" + + +# query_vdev_status (pool, vdev) +# +# Given a [pool] and [vdev], return the matching vdev path & status on stdout. +# +# Warning: This function does not handle the case of [pool] or [vdev] +# containing whitespace. Beware of ShellCheck SC2046. Caveat emptor. +# +# Arguments +# pool: pool name +# vdev: virtual device name +# +# StdOut +# arg1: vdev pathname +# arg2: vdev status +# +query_vdev_status() +{ + local pool="$1" + local vdev="$2" + local t + + vdev="$(basename -- "${vdev}")" + ([ -n "${pool}" ] && [ -n "${vdev}" ]) || return + t="$(printf '\t')" + + "${ZPOOL}" status "${pool}" 2>/dev/null | sed -n -e \ + "s,^[ $t]*\(.*${vdev}\(-part[0-9]\+\)\?\)[ $t]*\([A-Z]\+\).*,\1 \3,p" \ + | tail -1 +} + + +# notify (old_vdev, new_vdev, num_errors) +# +# Send a notification regarding the hot spare replacement. +# +# Arguments +# old_vdev: path of old vdev that has failed +# new_vdev: path of new vdev used as the hot spare replacement +# num_errors: number of errors that triggered this replacement +# +notify() +{ + local old_vdev="$1" + local new_vdev="$2" + local num_errors="$3" + local note_subject + local note_pathname + local s + local rv + + umask 077 + note_subject="ZFS hot spare replacement for ${ZEVENT_POOL} on $(hostname)" + note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" + { + [ "${num_errors}" -ne 1 ] 2>/dev/null && s="s" + + echo "ZFS has replaced a failing device with a hot spare after" \ + "${num_errors} ${ZEVENT_SUBCLASS} error${s}:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + echo " old: ${old_vdev}" + echo " new: ${new_vdev}" + + "${ZPOOL}" status "${ZEVENT_POOL}" + + } > "${note_pathname}" + + zed_notify "${note_subject}" "${note_pathname}"; rv=$? + rm -f "${note_pathname}" + return "${rv}" } -# Fault devices after N I/O errors. -if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then - ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}` - - if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \ - ${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then - ACTION="fault" - fi -# Degrade devices after N checksum errors. -elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then - ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS} - - if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \ - ${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then - ACTION="degrade" - fi -else - ACTION= -fi - -if [ -n "${ACTION}" ]; then - - # Device is already FAULTED or DEGRADED - set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}` - ZEVENT_VDEV_PATH_FOUND=$1 - STATUS=$2 - if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then - exit 3 - fi - - # Step 1) FAULT or DEGRADE the device - # - ${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL} - - # Step 2) Set the SES fault beacon. - # - # XXX: Set the 'fault' or 'ident' beacon for the device. This can - # be done through the sg_ses utility, the only hard part is to map - # the sd device to its corresponding enclosure and slot. We may - # be able to leverage the existing vdev_id scripts for this. - # - # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3 - # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3 - - # Step 3) Replace the device with a hot spare. - # - # Round robin through the spares selecting those which are available. - # - for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do - set -- `vdev_status ${ZEVENT_POOL} ${SPARE}` - SPARE_VDEV_FOUND=$1 - STATUS=$2 - if [ "${STATUS}" = "AVAIL" ]; then - ${ZPOOL} replace ${ZEVENT_POOL} \ - ${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0 - fi - done - - exit 1 -fi - -exit 4 + +# main +# +# Arguments +# none +# +# Return +# see above +# +main() +{ + local num_errors + local action + local lockfile + local vdev_path + local vdev_status + local spare + local spare_path + local spare_status + local zpool_err + local zpool_rv + local rv + + # Avoid hot-sparing a hot-spare. + # + # Note: ZEVENT_VDEV_PATH is not defined for ZEVENT_VDEV_TYPE=spare. + # + [ "${ZEVENT_VDEV_TYPE}" = "spare" ] && exit 2 + + [ -n "${ZEVENT_POOL}" ] || exit 9 + [ -n "${ZEVENT_VDEV_GUID}" ] || exit 9 + [ -n "${ZEVENT_VDEV_PATH}" ] || exit 9 + + zed_check_cmd "${ZPOOL}" "${ZINJECT}" || exit 9 + + # Fault the device after a given number of I/O errors. + # + if [ "${ZEVENT_SUBCLASS}" = "io" ]; then + if [ "${ZED_SPARE_ON_IO_ERRORS}" -gt 0 ]; then + num_errors=$((ZEVENT_VDEV_READ_ERRORS + ZEVENT_VDEV_WRITE_ERRORS)) + [ "${num_errors}" -ge "${ZED_SPARE_ON_IO_ERRORS}" ] \ + && action="fault" + fi 2>/dev/null + + # Degrade the device after a given number of checksum errors. + # + elif [ "${ZEVENT_SUBCLASS}" = "checksum" ]; then + if [ "${ZED_SPARE_ON_CHECKSUM_ERRORS}" -gt 0 ]; then + num_errors="${ZEVENT_VDEV_CKSUM_ERRORS}" + [ "${num_errors}" -ge "${ZED_SPARE_ON_CHECKSUM_ERRORS}" ] \ + && action="degrade" + fi 2>/dev/null + + else + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 + fi + + # Error threshold not reached. + # + if [ -z "${action}" ]; then + exit 2 + fi + + lockfile="zed.spare.lock" + zed_lock "${lockfile}" + + # shellcheck disable=SC2046 + set -- $(query_vdev_status "${ZEVENT_POOL}" "${ZEVENT_VDEV_PATH}") + vdev_path="$1" + vdev_status="$2" + + # Device is already FAULTED or DEGRADED. + # + if [ "${vdev_status}" = "FAULTED" ] \ + || [ "${vdev_status}" = "DEGRADED" ]; then + rv=3 + + else + rv=1 + + # 1) FAULT or DEGRADE the device. + # + "${ZINJECT}" -d "${ZEVENT_VDEV_GUID}" -A "${action}" "${ZEVENT_POOL}" + + # 2) Set the SES fault beacon. + # + # TODO: Set the 'fault' or 'ident' beacon for the device. This can + # be done through the sg_ses utility. The only hard part is to map + # the sd device to its corresponding enclosure and slot. We may + # be able to leverage the existing vdev_id scripts for this. + # + # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3 + # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3 + + # 3) Replace the device with a hot spare. + # + # Round-robin through the spares trying those that are available. + # + for spare in ${ZEVENT_VDEV_SPARE_PATHS}; do + + # shellcheck disable=SC2046 + set -- $(query_vdev_status "${ZEVENT_POOL}" "${spare}") + spare_path="$1" + spare_status="$2" + + [ "${spare_status}" = "AVAIL" ] || continue + + zpool_err="$("${ZPOOL}" replace "${ZEVENT_POOL}" \ + "${ZEVENT_VDEV_GUID}" "${spare_path}" 2>&1)"; zpool_rv=$? + + if [ "${zpool_rv}" -ne 0 ]; then + [ -n "${zpool_err}" ] && zed_log_err "zpool ${zpool_err}" + else + notify "${vdev_path}" "${spare_path}" "${num_errors}" + rv=0 + break + fi + done + fi + + zed_unlock "${lockfile}" + exit "${rv}" +} + + +main "$@" diff --git a/cmd/zed/zed.d/resilver.finish-email.sh b/cmd/zed/zed.d/resilver.finish-email.sh deleted file mode 120000 index 1afad3258d5e..000000000000 --- a/cmd/zed/zed.d/resilver.finish-email.sh +++ /dev/null @@ -1 +0,0 @@ -scrub.finish-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/resilver.finish-notify.sh b/cmd/zed/zed.d/resilver.finish-notify.sh new file mode 120000 index 000000000000..2635dcce118b --- /dev/null +++ b/cmd/zed/zed.d/resilver.finish-notify.sh @@ -0,0 +1 @@ +scrub.finish-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/scrub.finish-email.sh b/cmd/zed/zed.d/scrub.finish-email.sh deleted file mode 100755 index d92ccfea1231..000000000000 --- a/cmd/zed/zed.d/scrub.finish-email.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH. -# By default, "zpool status" output will only be included in the email for -# a scrub.finish zevent if the pool is not healthy; to always include its -# output, set ZED_EMAIL_VERBOSE=1. -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 - -if test "${ZEVENT_SUBCLASS}" = "resilver.finish"; then - ACTION="resilvering" -elif test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then - ACTION="scrubbing" -else - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi - -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 - -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi -if ! test -x "${ZPOOL}"; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${ZPOOL}" not installed - exit 3 -fi - -# For scrub, suppress email if pool is healthy and verbosity is not enabled. -if test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then - HEALTHY=`"${ZPOOL}" status -x "${ZEVENT_POOL}" | \ - grep "'${ZEVENT_POOL}' is healthy"` - test -n "${HEALTHY}" -a "${ZED_EMAIL_VERBOSE:=0}" = 0 && exit 2 -fi - -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" < "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh new file mode 100644 index 000000000000..cc4f69667ee7 --- /dev/null +++ b/cmd/zed/zed.d/zed-functions.sh @@ -0,0 +1,392 @@ +# zed-functions.sh +# +# ZED helper functions for use in ZEDLETs + + +# Variable Defaults +# +: "${ZED_LOCKDIR:="/var/lock"}" +: "${ZED_NOTIFY_INTERVAL_SECS:=3600}" +: "${ZED_NOTIFY_VERBOSE:=0}" +: "${ZED_RUNDIR:="/var/run"}" +: "${ZED_SYSLOG_PRIORITY:="daemon.notice"}" +: "${ZED_SYSLOG_TAG:="zed"}" + +ZED_FLOCK_FD=8 + + +# zed_check_cmd (cmd, ...) +# +# For each argument given, search PATH for the executable command [cmd]. +# Log a message if [cmd] is not found. +# +# Arguments +# cmd: name of executable command for which to search +# +# Return +# 0 if all commands are found in PATH and are executable +# n for a count of the command executables that are not found +# +zed_check_cmd() +{ + local cmd + local rv=0 + + for cmd; do + if ! command -v "${cmd}" >/dev/null 2>&1; then + zed_log_err "\"${cmd}\" not installed" + rv=$((rv + 1)) + fi + done + return "${rv}" +} + + +# zed_log_msg (msg, ...) +# +# Write all argument strings to the system log. +# +# Globals +# ZED_SYSLOG_PRIORITY +# ZED_SYSLOG_TAG +# +# Return +# nothing +# +zed_log_msg() +{ + logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "$@" +} + + +# zed_log_err (msg, ...) +# +# Write an error message to the system log. This message will contain the +# script name, EID, and all argument strings. +# +# Globals +# ZED_SYSLOG_PRIORITY +# ZED_SYSLOG_TAG +# ZEVENT_EID +# +# Return +# nothing +# +zed_log_err() +{ + logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \ + "$(basename -- "$0"):" "${ZEVENT_EID:+"eid=${ZEVENT_EID}:"}" "$@" +} + + +# zed_lock (lockfile, [fd]) +# +# Obtain an exclusive (write) lock on [lockfile]. If the lock cannot be +# immediately acquired, wait until it becomes available. +# +# Every zed_lock() must be paired with a corresponding zed_unlock(). +# +# By default, flock-style locks associate the lockfile with file descriptor 8. +# The bash manpage warns that file descriptors >9 should be used with care as +# they may conflict with file descriptors used internally by the shell. File +# descriptor 9 is reserved for zed_rate_limit(). If concurrent locks are held +# within the same process, they must use different file descriptors (preferably +# decrementing from 8); otherwise, obtaining a new lock with a given file +# descriptor will release the previous lock associated with that descriptor. +# +# Arguments +# lockfile: pathname of the lock file; the lock will be stored in +# ZED_LOCKDIR unless the pathname contains a "/". +# fd: integer for the file descriptor used by flock (OPTIONAL unless holding +# concurrent locks) +# +# Globals +# ZED_FLOCK_FD +# ZED_LOCKDIR +# +# Return +# nothing +# +zed_lock() +{ + local lockfile="$1" + local fd="${2:-${ZED_FLOCK_FD}}" + local umask_bak + local err + + [ -n "${lockfile}" ] || return + if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then + lockfile="${ZED_LOCKDIR}/${lockfile}" + fi + + umask_bak="$(umask)" + umask 077 + + # Obtain a lock on the file bound to the given file descriptor. + # + eval "exec ${fd}> '${lockfile}'" + err="$(flock --exclusive "${fd}" 2>&1)" + if [ $? -ne 0 ]; then + zed_log_err "failed to lock \"${lockfile}\": ${err}" + fi + + umask "${umask_bak}" +} + + +# zed_unlock (lockfile, [fd]) +# +# Release the lock on [lockfile]. +# +# Arguments +# lockfile: pathname of the lock file +# fd: integer for the file descriptor used by flock (must match the file +# descriptor passed to the zed_lock function call) +# +# Globals +# ZED_FLOCK_FD +# ZED_LOCKDIR +# +# Return +# nothing +# +zed_unlock() +{ + local lockfile="$1" + local fd="${2:-${ZED_FLOCK_FD}}" + local err + + [ -n "${lockfile}" ] || return + if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then + lockfile="${ZED_LOCKDIR}/${lockfile}" + fi + + # Release the lock and close the file descriptor. + # + err="$(flock --unlock "${fd}" 2>&1)" + if [ $? -ne 0 ]; then + zed_log_err "failed to unlock \"${lockfile}\": ${err}" + fi + eval "exec ${fd}>&-" +} + + +# zed_notify (subject, pathname) +# +# Send a notification via all available methods. +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Return +# 0: notification succeeded via at least one method +# 1: notification failed +# 2: no notification methods configured +# +zed_notify() +{ + local subject="$1" + local pathname="$2" + local num_success=0 + local num_failure=0 + + zed_notify_email "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + + zed_notify_pushbullet "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + + [ "${num_success}" -gt 0 ] && return 0 + [ "${num_failure}" -gt 0 ] && return 1 + return 2 +} + + +# zed_notify_email (subject, pathname) +# +# Send a notification via email to the address specified by ZED_EMAIL. +# +# Requires the mail executable to be installed in the standard PATH. +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_EMAIL +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_email() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + + [ -n "${ZED_EMAIL}" ] || return 2 + + [ -n "${subject}" ] || return 1 + if [ ! -r "${pathname}" ]; then + zed_log_err "mail cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "mail" || return 1 + + mail -s "${subject}" "${ZED_EMAIL}" < "${pathname}" >/dev/null 2>&1; rv=$? + if [ "${rv}" -ne 0 ]; then + zed_log_err "mail exit=${rv}" + return 1 + fi + return 0 +} + + +# zed_notify_pushbullet (subject, pathname) +# +# Send a notification via Pushbullet . +# The access token (ZED_PUSHBULLET_ACCESS_TOKEN) identifies this client to the +# Pushbullet server. The optional channel tag (ZED_PUSHBULLET_CHANNEL_TAG) is +# for pushing to notification feeds that can be subscribed to; if a channel is +# not defined, push notifications will instead be sent to all devices +# associated with the account specified by the access token. +# +# Requires awk, curl, and sed executables to be installed in the standard PATH. +# +# References +# https://docs.pushbullet.com/ +# https://www.pushbullet.com/security +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_PUSHBULLET_ACCESS_TOKEN +# ZED_PUSHBULLET_CHANNEL_TAG +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_pushbullet() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + local msg_body + local msg_tag + local msg_json + local msg_out + local msg_err + local url="https://api.pushbullet.com/v2/pushes" + + [ -n "${ZED_PUSHBULLET_ACCESS_TOKEN}" ] || return 2 + + [ -n "${subject}" ] || return 1 + if [ ! -r "${pathname}" ]; then + zed_log_err "pushbullet cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "awk" "curl" "sed" || return 1 + + # Escape the following characters in the message body for JSON: + # newline, backslash, double quote, horizontal tab, vertical tab, + # and carriage return. + # + msg_body="$(awk '{ ORS="\\n" } { gsub(/\\/, "\\\\"); gsub(/"/, "\\\""); + gsub(/\t/, "\\t"); gsub(/\f/, "\\f"); gsub(/\r/, "\\r"); print }' \ + "${pathname}")" + + # Push to a channel if one is configured. + # + [ -n "${ZED_PUSHBULLET_CHANNEL_TAG}" ] && msg_tag="$(printf \ + '"channel_tag": "%s", ' "${ZED_PUSHBULLET_CHANNEL_TAG}")" + + # Construct the JSON message for pushing a note. + # + msg_json="$(printf '{%s"type": "note", "title": "%s", "body": "%s"}' \ + "${msg_tag}" "${subject}" "${msg_body}")" + + # Send the POST request and check for errors. + # + msg_out="$(curl -u "${ZED_PUSHBULLET_ACCESS_TOKEN}:" -X POST "${url}" \ + --header "Content-Type: application/json" --data-binary "${msg_json}" \ + 2>/dev/null)"; rv=$? + if [ "${rv}" -ne 0 ]; then + zed_log_err "curl exit=${rv}" + return 1 + fi + msg_err="$(echo "${msg_out}" \ + | sed -n -e 's/.*"error" *:.*"message" *: *"\([^"]*\)".*/\1/p')" + if [ -n "${msg_err}" ]; then + zed_log_err "pushbullet \"${msg_err}"\" + return 1 + fi + return 0 +} + + +# zed_rate_limit (tag, [interval]) +# +# Check whether an event of a given type [tag] has already occurred within the +# last [interval] seconds. +# +# This function obtains a lock on the statefile using file descriptor 9. +# +# Arguments +# tag: arbitrary string for grouping related events to rate-limit +# interval: time interval in seconds (OPTIONAL) +# +# Globals +# ZED_NOTIFY_INTERVAL_SECS +# ZED_RUNDIR +# +# Return +# 0 if the event should be processed +# 1 if the event should be dropped +# +# State File Format +# time;tag +# +zed_rate_limit() +{ + local tag="$1" + local interval="${2:-${ZED_NOTIFY_INTERVAL_SECS}}" + local lockfile="zed.zedlet.state.lock" + local lockfile_fd=9 + local statefile="${ZED_RUNDIR}/zed.zedlet.state" + local time_now + local time_prev + local umask_bak + local rv=0 + + [ -n "${tag}" ] || return 0 + + zed_lock "${lockfile}" "${lockfile_fd}" + time_now="$(date +%s)" + time_prev="$(egrep "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \ + | tail -1 | cut -d\; -f1)" + + if [ -n "${time_prev}" ] \ + && [ "$((time_now - time_prev))" -lt "${interval}" ]; then + rv=1 + else + umask_bak="$(umask)" + umask 077 + egrep -v "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \ + > "${statefile}.$$" + echo "${time_now};${tag}" >> "${statefile}.$$" + mv -f "${statefile}.$$" "${statefile}" + umask "${umask_bak}" + fi + + zed_unlock "${lockfile}" "${lockfile_fd}" + return "${rv}" +} diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 69989f95315b..c2336287f201 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -1,34 +1,80 @@ +## # zed.rc +# +# This file should be owned by root and permissioned 0600. +## +## # Absolute path to the debug output file. +# #ZED_DEBUG_LOG="/tmp/zed.debug.log" -# Email address of the zpool administrator. +## +# Email address of the zpool administrator for receipt of notifications. # Email will only be sent if ZED_EMAIL is defined. +# Disabled by default; uncomment to enable. +# #ZED_EMAIL="root" -# Email verbosity. -# If set to 0, suppress email if the pool is healthy. -# If set to 1, send email regardless of pool health. -#ZED_EMAIL_VERBOSE=0 - -# Minimum number of seconds between emails sent for a similar event. -#ZED_EMAIL_INTERVAL_SECS="3600" - +## # Default directory for zed lock files. +# #ZED_LOCKDIR="/var/lock" +## +# Minimum number of seconds between notifications for a similar event. +# +#ZED_NOTIFY_INTERVAL_SECS=3600 + +## +# Notification verbosity. +# If set to 0, suppress notification if the pool is healthy. +# If set to 1, send notification regardless of pool health. +# +#ZED_NOTIFY_VERBOSE=0 + +## +# Pushbullet access token. +# This grants full access to your account -- protect it accordingly! +# +# +# Disabled by default; uncomment to enable. +# +#ZED_PUSHBULLET_ACCESS_TOKEN="" + +## +# Pushbullet channel tag for push notification feeds that can be subscribed to. +# +# If not defined, push notifications will instead be sent to all devices +# associated with the account specified by the access token. +# Disabled by default; uncomment to enable. +# +#ZED_PUSHBULLET_CHANNEL_TAG="" + +## # Default directory for zed state files. +# #ZED_RUNDIR="/var/run" -# The syslog priority (eg, specified as a "facility.level" pair). +## +# Replace a device with a hot spare after N checksum errors are detected. +# Disabled by default; uncomment to enable. +# +#ZED_SPARE_ON_CHECKSUM_ERRORS=10 + +## +# Replace a device with a hot spare after N I/O errors are detected. +# Disabled by default; uncomment to enable. +# +#ZED_SPARE_ON_IO_ERRORS=1 + +## +# The syslog priority (e.g., specified as a "facility.level" pair). +# #ZED_SYSLOG_PRIORITY="daemon.notice" +## # The syslog tag for marking zed events. +# #ZED_SYSLOG_TAG="zed" -# Replace a device with a hot spare after N I/O errors are detected. -#ZED_SPARE_ON_IO_ERRORS=1 - -# Replace a device with a hot spare after N checksum errors are detected. -#ZED_SPARE_ON_CHECKSUM_ERRORS=10