Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zed syslog entries drop important info #10967

Merged
merged 4 commits into from
Oct 19, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions cmd/zed/zed.d/all-syslog.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,50 @@
#!/bin/sh
#
# Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
# Copyright (c) 2020 by Delphix. All rights reserved.
#

#
# Log the zevent via syslog.
#

[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"

zed_exit_if_ignoring_this_event

zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \
"${ZEVENT_POOL_GUID:+"pool_guid=${ZEVENT_POOL_GUID}"}" \
"${ZEVENT_VDEV_PATH:+"vdev_path=${ZEVENT_VDEV_PATH}"}" \
"${ZEVENT_VDEV_STATE_STR:+"vdev_state=${ZEVENT_VDEV_STATE_STR}"}"
# build a string of name=value pairs for this event
msg="eid=${ZEVENT_EID} class=${ZEVENT_SUBCLASS}"

if [ "${ZED_SYSLOG_DISPLAY_GUIDS}" = "1" ]; then
[ -n "${ZEVENT_POOL_GUID}" ] && msg="${msg} pool_guid=${ZEVENT_POOL_GUID}"
[ -n "${ZEVENT_VDEV_GUID}" ] && msg="${msg} vdev_guid=${ZEVENT_VDEV_GUID}"
else
[ -n "${ZEVENT_POOL}" ] && msg="${msg} pool='${ZEVENT_POOL}'"
[ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=$(basename "${ZEVENT_VDEV_PATH}")"
fi

# log pool state if state is anything other than 'ACTIVE'
[ -n "${ZEVENT_POOL_STATE_STR}" ] && [ "$ZEVENT_POOL_STATE" -ne 0 ] && \
msg="${msg} pool_state=${ZEVENT_POOL_STATE_STR}"

# Log the following payload nvpairs if they are present
[ -n "${ZEVENT_VDEV_STATE_STR}" ] && msg="${msg} vdev_state=${ZEVENT_VDEV_STATE_STR}"
[ -n "${ZEVENT_CKSUM_ALGORITHM}" ] && msg="${msg} algorithm=${ZEVENT_CKSUM_ALGORITHM}"
[ -n "${ZEVENT_ZIO_SIZE}" ] && msg="${msg} size=${ZEVENT_ZIO_SIZE}"
[ -n "${ZEVENT_ZIO_OFFSET}" ] && msg="${msg} offset=${ZEVENT_ZIO_OFFSET}"
[ -n "${ZEVENT_ZIO_PRIORITY}" ] && msg="${msg} priority=${ZEVENT_ZIO_PRIORITY}"
[ -n "${ZEVENT_ZIO_ERR}" ] && msg="${msg} err=${ZEVENT_ZIO_ERR}"
[ -n "${ZEVENT_ZIO_FLAGS}" ] && msg="${msg} flags=$(printf '0x%x' "${ZEVENT_ZIO_FLAGS}")"

# log delays that are >= 10 milisec
[ -n "${ZEVENT_ZIO_DELAY}" ] && [ "$ZEVENT_ZIO_DELAY" -gt 10000000 ] && \
msg="${msg} delay=$((ZEVENT_ZIO_DELAY / 1000000))ms"

# list the bookmark data together
[ -n "${ZEVENT_ZIO_OBJSET}" ] && \
msg="${msg} bookmark=${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}:${ZEVENT_ZIO_LEVEL}:${ZEVENT_ZIO_BLKID}"

zed_log_msg "${msg}"

exit 0
7 changes: 6 additions & 1 deletion cmd/zed/zed.d/zed.rc
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,10 @@ ZED_USE_ENCLOSURE_LEDS=1
# Otherwise, if ZED_SYSLOG_SUBCLASS_EXCLUDE is set, the
# matching subclasses are excluded from logging.
#ZED_SYSLOG_SUBCLASS_INCLUDE="checksum|scrub_*|vdev.*"
#ZED_SYSLOG_SUBCLASS_EXCLUDE="statechange|config_*|history_event"
ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"

##
# Use GUIDs instead of names when logging pool and vdevs
# Disabled by default, 1 to enable and 0 to disable.
#ZED_SYSLOG_DISPLAY_GUIDS=1

22 changes: 11 additions & 11 deletions tests/zfs-tests/tests/functional/events/events_001_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -94,43 +94,43 @@ run_and_verify -p "$MPOOL"\
-e "resource.fs.zfs.statechange" \
-e "sysevent.fs.zfs.config_sync" \
"zpool offline $MPOOL $VDEV1"
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "resource.fs.zfs.statechange" \
-e "sysevent.fs.zfs.vdev_online" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_finish" \
"zpool online $MPOOL $VDEV1"

# Attach then detach a device from the mirror.
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_attach" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_finish" \
"zpool attach $MPOOL $VDEV1 $VDEV4"
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_remove" \
-e "sysevent.fs.zfs.config_sync" \
"zpool detach $MPOOL $VDEV4"

# Replace a device
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.vdev_attach" \
-e "sysevent.fs.zfs.config_sync" \
-e "sysevent.fs.zfs.resilver_start" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.resilver_finish" \
-e "sysevent.fs.zfs.vdev_remove" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.config_sync" \
"zpool replace -f $MPOOL $VDEV1 $VDEV4"

# Scrub a pool.
run_and_verify -p "$MPOOL" -d 10 \
run_and_verify -p "$MPOOL" \
-e "sysevent.fs.zfs.scrub_start" \
-e "sysevent.fs.zfs.scrub_finish" \
-e "sysevent.fs.zfs.history_event" \
-e "sysevent.fs.zfs.scrub_finish" \
"zpool scrub $MPOOL"

# Export then import a pool
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/functional/events/events_002_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ done

# 5. Start the ZED and verify it only handled the new missed events.
log_must zed_start
log_must file_wait $ZED_DEBUG_LOG 15
log_must file_wait $ZED_DEBUG_LOG 35 180
log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED

log_mustnot grep -q "sysevent.fs.zfs.pool_create" $TMP_EVENTS_ZED
Expand Down
58 changes: 44 additions & 14 deletions tests/zfs-tests/tests/functional/events/events_common.kshlib
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,35 @@
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
# Use is subject to license terms.
#
# Copyright (c) 2020 by Delphix. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events.cfg

#
# wait for 'event' to show up in the log 'file'
function file_wait_event # file event timeout
{
file=$1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be unused. Perhaps have it default to $ZED_DEBUG_LOG?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are still some external callers to file_wait that can possibly be converted to file_wait_event so I left the file input available.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having an unused variable invites confusion. Even in my initial comment I didn't clearly understand what its purpose was intended to be.

event=$2
timeout=${3:-120}

SECONDS=0

until grep -q "^ZEVENT_CLASS=$event" $ZED_DEBUG_LOG ; do
if [[ $SECONDS -gt $timeout ]]; then
echo file_wait_event exceeded $SECONDS seconds
return 1
fi

sleep 1
((SECONDS=SECONDS+1))
don-brady marked this conversation as resolved.
Show resolved Hide resolved
done

return 0;
}

#
# Wait for up to 'timeout' seconds for the 'file' to settle, i.e.
# not be updated for a period of 'delay' seconds.
Expand All @@ -41,41 +66,35 @@ function file_wait # file delay timeout

while [ $(( $(date +%s) - $(stat -c %Y $file) )) -lt $delay ]; do
if [[ $SECONDS -gt $timeout ]]; then
echo file_wait exceeded $SECONDS seconds
return 1
fi

sleep 1
((SECONDS=SECONDS+1))
don-brady marked this conversation as resolved.
Show resolved Hide resolved
done

return 0;
}

function run_and_verify
{
typeset delay event pool zedlog
typeset event pool
set -A events

while getopts "d:e:p:z:" opt; do
don-brady marked this conversation as resolved.
Show resolved Hide resolved
case $opt in
d)
delay=$OPTARG
;;
don-brady marked this conversation as resolved.
Show resolved Hide resolved
e)
events[${#events[*]}+1]=$OPTARG
events+=("$OPTARG")
;;
p)
pool=$OPTARG
;;
z)
zedlog=$OPTARG
;;
esac
done
shift $(($OPTIND - 1))

pool=${pool:-$TESTPOOL}
delay=${delay:-3}
zedlog=${zedlog:-$ZED_DEBUG_LOG}
fullcmd="$1"
cmd=$(echo $fullcmd | awk '{print $1}')

Expand All @@ -87,21 +106,27 @@ function run_and_verify

# Remove any previous events from the logs.
log_must zpool events -c
log_must truncate -s 0 $zedlog
log_must truncate -s 0 $ZED_DEBUG_LOG

# Run the command as provided.
log_must eval "$fullcmd"

# Collect the new events and verify there are some.
log_must zpool sync -f
log_must file_wait $zedlog $delay
log_must cp $zedlog $TMP_EVENTS_ZED
log_must eval "zpool events >$TMP_EVENTS 2>/dev/null"
log_must eval "zpool events -v > $TMP_EVENTS_FULL 2>/dev/null"

log_must test -s $TMP_EVENTS
log_must test -s $TMP_EVENTS_FULL
log_must test -s $TMP_EVENTS_ZED

# If the only event is history then we don't observe zed debug log
if [[ "${events[0]}" != "sysevent.fs.zfs.history_event" ]]; then
# wait for the last event to show up in the debug log
log_must file_wait_event $ZED_DEBUG_LOG ${events[-1]}

log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED
log_must test -s $TMP_EVENTS_ZED
fi

log_note "Events generated:"
cat $TMP_EVENTS
Expand All @@ -118,6 +143,11 @@ function run_and_verify
$TMP_EVENTS_FULL >$TMP_EVENT_FULL
log_must grep -q "pool = \"$pool\"" $TMP_EVENT_FULL

# history events are filtered from all-debug.sh
don-brady marked this conversation as resolved.
Show resolved Hide resolved
if [[ "$event" == "sysevent.fs.zfs.history_event" ]]; then
continue
fi

# Verify the event was received by the ZED and logged.
awk -v event="$event" \
'BEGIN{FS="\n"; RS=""} $0 ~ event { print $0 }' \
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ run_and_verify -p "$TESTPOOL" -e "sysevent.fs.zfs.history_event" \
log_note "Include multiple events"
zed_rc_set ZED_SYSLOG_SUBCLASS_INCLUDE 'scrub_start|scrub_finish'
run_and_verify -p "$TESTPOOL" -e "sysevent.fs.zfs.scrub_start" \
-e "sysevent.fs.zfs.scrub_finish" \
-e "sysevent.fs.zfs.scrub_finish" -d 8 \
don-brady marked this conversation as resolved.
Show resolved Hide resolved
"zpool scrub $TESTPOOL && wait_scrubbed $TESTPOOL"

# We can't use run_and_verify() for exclusions, so run the rest of the tests
Expand Down