Skip to content

Commit

Permalink
zed syslog entries drop important info
Browse files Browse the repository at this point in the history
ZED will log zevents summaries to the syslog, however the log entries 
tend to drop event details that can be useful for diagnosis. This is 
especially true for ereport events, like io, checksum, and delay.

Update the all-syslog.sh script to log additional event information.

Add an optional config option, ZED_SYSLOG_DISPLAY_GUIDS, to zed.rc
for choosing GUIDs over names for pool and vdev.

Change the default ZED_SYSLOG_SUBCLASS_EXCLUDE to exclude history_event 
events. These events tend to be frequent, convey no meaningful info, 
and are already logged in the zpool history.

Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: Pavel Zakharov <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Don Brady <[email protected]>
Closes #10967
  • Loading branch information
Don Brady authored Oct 19, 2020
1 parent ab6a0e2 commit 13d6598
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 34 deletions.
44 changes: 40 additions & 4 deletions cmd/zed/zed.d/all-syslog.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,50 @@
#!/bin/sh
#
# Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
# Copyright (c) 2020 by Delphix. All rights reserved.
#

#
# Log the zevent via syslog.
#

[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"

zed_exit_if_ignoring_this_event

zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \
"${ZEVENT_POOL_GUID:+"pool_guid=${ZEVENT_POOL_GUID}"}" \
"${ZEVENT_VDEV_PATH:+"vdev_path=${ZEVENT_VDEV_PATH}"}" \
"${ZEVENT_VDEV_STATE_STR:+"vdev_state=${ZEVENT_VDEV_STATE_STR}"}"
# build a string of name=value pairs for this event
msg="eid=${ZEVENT_EID} class=${ZEVENT_SUBCLASS}"

if [ "${ZED_SYSLOG_DISPLAY_GUIDS}" = "1" ]; then
[ -n "${ZEVENT_POOL_GUID}" ] && msg="${msg} pool_guid=${ZEVENT_POOL_GUID}"
[ -n "${ZEVENT_VDEV_GUID}" ] && msg="${msg} vdev_guid=${ZEVENT_VDEV_GUID}"
else
[ -n "${ZEVENT_POOL}" ] && msg="${msg} pool='${ZEVENT_POOL}'"
[ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=$(basename "${ZEVENT_VDEV_PATH}")"
fi

# log pool state if state is anything other than 'ACTIVE'
[ -n "${ZEVENT_POOL_STATE_STR}" ] && [ "$ZEVENT_POOL_STATE" -ne 0 ] && \
msg="${msg} pool_state=${ZEVENT_POOL_STATE_STR}"

# Log the following payload nvpairs if they are present
[ -n "${ZEVENT_VDEV_STATE_STR}" ] && msg="${msg} vdev_state=${ZEVENT_VDEV_STATE_STR}"
[ -n "${ZEVENT_CKSUM_ALGORITHM}" ] && msg="${msg} algorithm=${ZEVENT_CKSUM_ALGORITHM}"
[ -n "${ZEVENT_ZIO_SIZE}" ] && msg="${msg} size=${ZEVENT_ZIO_SIZE}"
[ -n "${ZEVENT_ZIO_OFFSET}" ] && msg="${msg} offset=${ZEVENT_ZIO_OFFSET}"
[ -n "${ZEVENT_ZIO_PRIORITY}" ] && msg="${msg} priority=${ZEVENT_ZIO_PRIORITY}"
[ -n "${ZEVENT_ZIO_ERR}" ] && msg="${msg} err=${ZEVENT_ZIO_ERR}"
[ -n "${ZEVENT_ZIO_FLAGS}" ] && msg="${msg} flags=$(printf '0x%x' "${ZEVENT_ZIO_FLAGS}")"

# log delays that are >= 10 milisec
[ -n "${ZEVENT_ZIO_DELAY}" ] && [ "$ZEVENT_ZIO_DELAY" -gt 10000000 ] && \
msg="${msg} delay=$((ZEVENT_ZIO_DELAY / 1000000))ms"

# list the bookmark data together
[ -n "${ZEVENT_ZIO_OBJSET}" ] && \
msg="${msg} bookmark=${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}:${ZEVENT_ZIO_LEVEL}:${ZEVENT_ZIO_BLKID}"

zed_log_msg "${msg}"

exit 0
7 changes: 6 additions & 1 deletion cmd/zed/zed.d/zed.rc
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,10 @@ ZED_USE_ENCLOSURE_LEDS=1
# Otherwise, if ZED_SYSLOG_SUBCLASS_EXCLUDE is set, the
# matching subclasses are excluded from logging.
#ZED_SYSLOG_SUBCLASS_INCLUDE="checksum|scrub_*|vdev.*"
#ZED_SYSLOG_SUBCLASS_EXCLUDE="statechange|config_*|history_event"
ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"

##
# Use GUIDs instead of names when logging pool and vdevs
# Disabled by default, 1 to enable and 0 to disable.
#ZED_SYSLOG_DISPLAY_GUIDS=1

26 changes: 13 additions & 13 deletions tests/zfs-tests/tests/functional/events/events_001_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -94,43 +94,43 @@ run_and_verify -p "$MPOOL"\
-e "resource.fs.zfs.statechange" \
-e "sysevent.fs.zfs.config_sync" \
"zpool offline $MPOOL $VDEV1"
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "resource.fs.zfs.statechange" \
-e "sysevent.fs.zfs.vdev_online" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_finish" \
"zpool online $MPOOL $VDEV1"

# Attach then detach a device from the mirror.
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_attach" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.resilver_finish" \
"zpool attach $MPOOL $VDEV1 $VDEV4"
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_remove" \
-e "sysevent.fs.zfs.config_sync" \
"zpool detach $MPOOL $VDEV4"

# Replace a device
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_attach" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.vdev_remove" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
"zpool replace -f $MPOOL $VDEV1 $VDEV4"

# Scrub a pool.
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.scrub_start" \
-e "sysevent.fs.zfs.scrub_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.scrub_finish" \
"zpool scrub $MPOOL"

# Export then import a pool
Expand All @@ -139,9 +139,9 @@ run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.config_sync" \
"zpool export $MPOOL"
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.pool_import" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.pool_import" \
"zpool import -d $TEST_BASE_DIR $MPOOL"

# Destroy the pool
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/functional/events/events_002_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ done

# 5. Start the ZED and verify it only handled the new missed events.
log_must zed_start
log_must file_wait $ZED_DEBUG_LOG 15
log_must file_wait $ZED_DEBUG_LOG 35
log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED

log_mustnot grep -q "sysevent.fs.zfs.pool_create" $TMP_EVENTS_ZED
Expand Down
61 changes: 46 additions & 15 deletions tests/zfs-tests/tests/functional/events/events_common.kshlib
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,34 @@
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
# Use is subject to license terms.
#
# Copyright (c) 2020 by Delphix. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events.cfg

#
# wait for 'event' to show up in the log 'file'
function file_wait_event # file event timeout
{
file=$1
event=$2
timeout=${3:-120}

SECONDS=0

until grep -q "^ZEVENT_CLASS=$event" $ZED_DEBUG_LOG ; do
if [[ $SECONDS -gt $timeout ]]; then
echo file_wait_event exceeded $SECONDS seconds
return 1
fi

sleep 1
done

return 0;
}

#
# Wait for up to 'timeout' seconds for the 'file' to settle, i.e.
# not be updated for a period of 'delay' seconds.
Expand All @@ -41,6 +65,7 @@ function file_wait # file delay timeout

while [ $(( $(date +%s) - $(stat -c %Y $file) )) -lt $delay ]; do
if [[ $SECONDS -gt $timeout ]]; then
echo file_wait exceeded $SECONDS seconds
return 1
fi

Expand All @@ -52,30 +77,22 @@ function file_wait # file delay timeout

function run_and_verify
{
typeset delay event pool zedlog
typeset event pool
set -A events

while getopts "d:e:p:z:" opt; do
while getopts "e:p:" opt; do
case $opt in
d)
delay=$OPTARG
;;
e)
events[${#events[*]}+1]=$OPTARG
events+=("$OPTARG")
;;
p)
pool=$OPTARG
;;
z)
zedlog=$OPTARG
;;
esac
done
shift $(($OPTIND - 1))

pool=${pool:-$TESTPOOL}
delay=${delay:-3}
zedlog=${zedlog:-$ZED_DEBUG_LOG}
fullcmd="$1"
cmd=$(echo $fullcmd | awk '{print $1}')

Expand All @@ -87,21 +104,30 @@ function run_and_verify

# Remove any previous events from the logs.
log_must zpool events -c
log_must truncate -s 0 $zedlog
log_must truncate -s 0 $ZED_DEBUG_LOG

# Run the command as provided.
log_must eval "$fullcmd"

# Collect the new events and verify there are some.
log_must zpool sync -f
log_must file_wait $zedlog $delay
log_must cp $zedlog $TMP_EVENTS_ZED
log_must eval "zpool events >$TMP_EVENTS 2>/dev/null"
log_must eval "zpool events -v > $TMP_EVENTS_FULL 2>/dev/null"

log_must test -s $TMP_EVENTS
log_must test -s $TMP_EVENTS_FULL
log_must test -s $TMP_EVENTS_ZED

# If the only event is history then we don't observe zed debug log
if [[ "${events[0]}" != "sysevent.fs.zfs.history_event" ]]; then
# wait for the last event to show up in the debug log
log_must file_wait_event $ZED_DEBUG_LOG ${events[-1]}

log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED
log_must test -s $TMP_EVENTS_ZED

log_note "Events logged:"
grep "^ZEVENT_CLASS" $TMP_EVENTS_ZED
fi

log_note "Events generated:"
cat $TMP_EVENTS
Expand All @@ -118,6 +144,11 @@ function run_and_verify
$TMP_EVENTS_FULL >$TMP_EVENT_FULL
log_must grep -q "pool = \"$pool\"" $TMP_EVENT_FULL

# all-debug.sh filters history events (seen in ZED_DEBUG_LOG)
if [[ "$event" == "sysevent.fs.zfs.history_event" ]]; then
continue
fi

# Verify the event was received by the ZED and logged.
awk -v event="$event" \
'BEGIN{FS="\n"; RS=""} $0 ~ event { print $0 }' \
Expand Down

0 comments on commit 13d6598

Please sign in to comment.