From 5a21214be84e4b00f419220e4da51f16cee13cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 10 Dec 2021 00:02:52 +0100 Subject: [PATCH 01/19] zfs, libzfs: diff: accept -h/ZFS_DIFF_NO_MANGLE, disabling path escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Rich Ercolani Signed-off-by: Ahelenia Ziemiańska Upstream-commit: 344bbc82e7054f61d5e7b3610b119820285fd2cb Closes #12829 --- cmd/zfs/zfs_main.c | 5 +- include/libzfs.h | 7 +-- include/libzfs_impl.h | 1 + lib/libzfs/libzfs_diff.c | 10 +++- man/man8/zfs-diff.8 | 6 ++- tests/runfiles/common.run | 2 +- .../functional/cli_root/zfs_diff/Makefile.am | 1 + .../cli_root/zfs_diff/zfs_diff_cliargs.ksh | 4 +- .../cli_root/zfs_diff/zfs_diff_mangle.ksh | 48 +++++++++++++++++++ 9 files changed, 74 insertions(+), 10 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index d0bb73a72513..d648aef7e71c 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -7672,7 +7672,7 @@ zfs_do_diff(int argc, char **argv) int c; struct sigaction sa; - while ((c = getopt(argc, argv, "FHt")) != -1) { + while ((c = getopt(argc, argv, "FHth")) != -1) { switch (c) { case 'F': flags |= ZFS_DIFF_CLASSIFY; @@ -7683,6 +7683,9 @@ zfs_do_diff(int argc, char **argv) case 't': flags |= ZFS_DIFF_TIMESTAMP; break; + case 'h': + flags |= ZFS_DIFF_NO_MANGLE; + break; default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); diff --git a/include/libzfs.h b/include/libzfs.h index eeb4daae723b..d55e3f2e7384 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -795,9 +795,10 @@ extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { - ZFS_DIFF_PARSEABLE = 0x1, - ZFS_DIFF_TIMESTAMP = 0x2, - ZFS_DIFF_CLASSIFY = 0x4 + ZFS_DIFF_PARSEABLE = 1 << 0, + ZFS_DIFF_TIMESTAMP = 1 << 1, + ZFS_DIFF_CLASSIFY = 1 << 2, + ZFS_DIFF_NO_MANGLE = 1 << 3 } diff_flags_t; extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index 96b11dad137c..043ff9cd77e4 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -234,6 +234,7 @@ typedef struct differ_info { boolean_t scripted; boolean_t classify; boolean_t timestamped; + boolean_t no_mangle; uint64_t shares; int zerr; int cleanupfd; diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index d46e23a2fc0e..b721a9fd9ec5 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -176,8 +176,13 @@ print_what(FILE *fp, mode_t what) static void print_cmn(FILE *fp, differ_info_t *di, const char *file) { - stream_bytes(fp, di->dsmnt); - stream_bytes(fp, file); + if (!di->no_mangle) { + stream_bytes(fp, di->dsmnt); + stream_bytes(fp, file); + } else { + (void) fputs(di->dsmnt, fp); + (void) fputs(file, fp); + } } static void @@ -752,6 +757,7 @@ zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, di.scripted = (flags & ZFS_DIFF_PARSEABLE); di.classify = (flags & ZFS_DIFF_CLASSIFY); di.timestamped = (flags & ZFS_DIFF_TIMESTAMP); + di.no_mangle = (flags & ZFS_DIFF_NO_MANGLE); di.outputfd = outfd; di.datafd = pipefd[0]; diff --git a/man/man8/zfs-diff.8 b/man/man8/zfs-diff.8 index 49443bf47d17..a347f325203e 100644 --- a/man/man8/zfs-diff.8 +++ b/man/man8/zfs-diff.8 @@ -39,7 +39,7 @@ .Sh SYNOPSIS .Nm zfs .Cm diff -.Op Fl FHt +.Op Fl FHth .Ar snapshot Ar snapshot Ns | Ns Ar filesystem . .Sh DESCRIPTION @@ -92,6 +92,10 @@ Give more parsable tab-separated output, without header lines and without arrows. .It Fl t Display the path's inode change time as the first column of output. +.It Fl h +Do not +.Sy \e0 Ns Ar ooo Ns -escape +non-ASCII paths. .El . .Sh SEE ALSO diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index aefcd98436d7..19919a00afb3 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -178,7 +178,7 @@ tags = ['functional', 'cli_root', 'zfs_destroy'] [tests/functional/cli_root/zfs_diff] tests = ['zfs_diff_changes', 'zfs_diff_cliargs', 'zfs_diff_timestamp', - 'zfs_diff_types', 'zfs_diff_encrypted'] + 'zfs_diff_types', 'zfs_diff_encrypted', 'zfs_diff_mangle'] tags = ['functional', 'cli_root', 'zfs_diff'] [tests/functional/cli_root/zfs_get] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am index db90e058559d..bfb01dcb8f86 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/Makefile.am @@ -8,6 +8,7 @@ dist_pkgdata_SCRIPTS = \ zfs_diff_changes.ksh \ zfs_diff_cliargs.ksh \ zfs_diff_encrypted.ksh \ + zfs_diff_mangle.ksh \ zfs_diff_timestamp.ksh \ zfs_diff_types.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh index 7063bbe9ce6a..67eb18fa4a5d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_cliargs.ksh @@ -39,8 +39,8 @@ function cleanup log_assert "'zfs diff' should only work with supported options." log_onexit cleanup -typeset goodopts=("" "-F" "-H" "-t" "-FH" "-Ft" "-Ht" "-FHt") -typeset badopts=("-f" "-h" "-h" "-T" "-Fx" "-Ho" "-tT" "-") +typeset goodopts=("" "-h" "-t" "-th" "-H" "-Hh" "-Ht" "-Hth" "-F" "-Fh" "-Ft" "-Fth" "-FH" "-FHh" "-FHt" "-FHth") +typeset badopts=("-f" "-T" "-Fx" "-Ho" "-tT" "-") DATASET="$TESTPOOL/$TESTFS" TESTSNAP1="$DATASET@snap1" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh new file mode 100755 index 000000000000..ffce9f06848f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_diff/zfs_diff_mangle.ksh @@ -0,0 +1,48 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zfs diff' escapes filenames as expected, 'zfs diff -h' doesn't +# +# STRATEGY: +# 1. Prepare a dataset +# 2. Create some files +# 3. verify 'zfs diff' mangles them and 'zfs diff -h' doesn't +# + +verify_runnable "both" + +function cleanup +{ + log_must zfs destroy -r "$DATASET" +} + +log_assert "'zfs diff' mangles filenames, 'zfs diff -h' doesn't" +log_onexit cleanup + +DATASET="$TESTPOOL/$TESTFS/fs" +TESTSNAP1="$DATASET@snap1" + +# 1. Prepare a dataset +log_must zfs create "$DATASET" +MNTPOINT="$(get_prop mountpoint "$DATASET")" +log_must zfs snapshot "$TESTSNAP1" + +printf '%c\t'"$MNTPOINT/"'%s\n' M '' + 'śmieszny żupan' + 'достопримечательности' | sort > "$MNTPOINT/śmieszny żupan" +printf '%c\t'"$MNTPOINT/"'%s\n' M '' + '\0305\0233mieszny\0040\0305\0274upan' + '\0320\0264\0320\0276\0321\0201\0321\0202\0320\0276\0320\0277\0321\0200\0320\0270\0320\0274\0320\0265\0321\0207\0320\0260\0321\0202\0320\0265\0320\0273\0321\0214\0320\0275\0320\0276\0321\0201\0321\0202\0320\0270' | sort > "$MNTPOINT/достопримечательности" +log_must diff -u <(zfs diff -h "$TESTSNAP1" | grep -vF '' | sort) "$MNTPOINT/śmieszny żupan" +log_must diff -u <(zfs diff "$TESTSNAP1" | grep -vF '' | sort) "$MNTPOINT/достопримечательности" + +log_pass "'zfs diff' mangles filenames, 'zfs diff -h' doesn't" From 7fbb90feeab7b8a61875e9ba1ec136125e8177cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 9 Dec 2021 23:42:02 +0100 Subject: [PATCH 02/19] libzfs: diff: stream_bytes: use fputc, %hho formats chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Rich Ercolani Signed-off-by: Ahelenia Ziemiańska Upstream-commit: a72129edcb7353f5301096ea9541e8e345f052d4 Closes #12829 --- lib/libzfs/libzfs_diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index b721a9fd9ec5..57a7c1556599 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -122,9 +122,9 @@ stream_bytes(FILE *fp, const char *string) while ((c = *string++) != '\0') { if (c > ' ' && c != '\\' && c < '\177') { - (void) fprintf(fp, "%c", c); + (void) fputc(c, fp); } else { - (void) fprintf(fp, "\\%04o", (uint8_t)c); + (void) fprintf(fp, "\\%04hho", (uint8_t)c); } } } From fe6f2651f55de3bf68ac4729386b5e85aa23a447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 30 Nov 2021 17:29:50 +0100 Subject: [PATCH 03/19] etc/systemd/zfs-mount-generator: serialise, handle keylocation=http[s]:// MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * etc/systemd/zfs-mount-generator: serialise The wins for a relatively normal workload are rather slim: real 0.02119s/0.00985s=2.15029x user 0.02130s/0.00346s=6.15560x sys 0.03858s/0.00643s=6.00062x wall-total 0.014518s/0.005925s=2.45009x wall-init 0.014518s/0.002457s=5.90684x wall-real 0.014518s/0.003467s=4.18668x But this is a big win on machines with a lot of datasets and expensive forks. For example, the gain on a VM on my work laptop with 900+ legacy-mount Docker datasets, the original gains from the C rewrite were only five-fold: real 0.516s/0.102s=5.05882x user 0.237s/0.143s=1.65734x sys 0.287s/0.100s=2.87x And this serial variant gains this back there as well: real 0.102s/0.008s=12.75x user 0.143s/0.007s=20.42857 sys 0.100s/0.001s=100x wall-total 0.09717s/0.00319s=30.40255x wall-init 0.00203s/0.00200s=1.015941x wall-real 0.09513s/0.00118s=80.02043x For a total of real 0.516s/0.008s=64.5x user 0.237s/0.007s=33.85714x sys 0.287s/0.001s=287x Suggested-by: Richard Laager * etc/systemd/zfs-mount-generator: pull in network for keylocation=https Also simplify RequiresMountsFor= handling Ref: #11956 Reviewed-by: Richard Laager Reviewed-by: Tony Nguyen Signed-off-by: Ahelenia Ziemiańska Upstream-commit: 4325de09cd2993837bc32a83d61872b57e58298e Closes #12138 --- .../system-generators/zfs-mount-generator.c | 437 +++++++----------- man/man8/zfs-mount-generator.8.in | 19 +- 2 files changed, 181 insertions(+), 275 deletions(-) diff --git a/etc/systemd/system-generators/zfs-mount-generator.c b/etc/systemd/system-generators/zfs-mount-generator.c index b806339deb2f..f4c6c26a0b34 100644 --- a/etc/systemd/system-generators/zfs-mount-generator.c +++ b/etc/systemd/system-generators/zfs-mount-generator.c @@ -27,9 +27,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -44,25 +41,16 @@ #include #include -#define STRCMP ((int(*)(const void *, const void *))&strcmp) -#define PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp) - -static int -pid_t_cmp(const pid_t *lhs, const pid_t *rhs) -{ - /* - * This is always valid, quoth sys_types.h(7posix): - * > blksize_t, pid_t, and ssize_t shall be signed integer types. - */ - return (*lhs - *rhs); -} +/* + * For debugging only. + * + * Free statics with trivial life-times, + * but saved line filenames are replaced with a static string. + */ +#define FREE_STATICS false -#define EXIT_ENOMEM() \ - do { \ - fprintf(stderr, PROGNAME "[%d]: " \ - "not enough memory (L%d)!\n", getpid(), __LINE__); \ - _exit(1); \ - } while (0) +#define nitems(arr) (sizeof (arr) / sizeof (*arr)) +#define STRCMP ((int(*)(const void *, const void *))&strcmp) #define PROGNAME "zfs-mount-generator" @@ -80,20 +68,11 @@ pid_t_cmp(const pid_t *lhs, const pid_t *rhs) #define URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$" static regex_t uri_regex; -static char *argv0; - static const char *destdir = "/tmp"; static int destdir_fd = -1; static void *known_pools = NULL; /* tsearch() of C strings */ -static struct { - sem_t noauto_not_on_sem; - - sem_t noauto_names_sem; - size_t noauto_names_len; - size_t noauto_names_max; - char noauto_names[][NAME_MAX]; -} *noauto_files; +static void *noauto_files = NULL; /* tsearch() of C strings */ static char * @@ -103,8 +82,12 @@ systemd_escape(const char *input, const char *prepend, const char *append) size_t applen = strlen(append); size_t prelen = strlen(prepend); char *ret = malloc(4 * len + prelen + applen + 1); - if (!ret) - EXIT_ENOMEM(); + if (!ret) { + fprintf(stderr, PROGNAME "[%d]: " + "out of memory to escape \"%s%s%s\"!\n", + getpid(), prepend, input, append); + return (NULL); + } memcpy(ret, prepend, prelen); char *out = ret + prelen; @@ -166,8 +149,12 @@ systemd_escape_path(char *input, const char *prepend, const char *append) { if (strcmp(input, "/") == 0) { char *ret; - if (asprintf(&ret, "%s-%s", prepend, append) == -1) - EXIT_ENOMEM(); + if (asprintf(&ret, "%s-%s", prepend, append) == -1) { + fprintf(stderr, PROGNAME "[%d]: " + "out of memory to escape \"%s%s%s\"!\n", + getpid(), prepend, input, append); + ret = NULL; + } return (ret); } else { /* @@ -209,6 +196,10 @@ fopenat(int dirfd, const char *pathname, int flags, static int line_worker(char *line, const char *cachefile) { + int ret = 0; + void *tofree_all[8]; + void **tofree = tofree_all; + char *toktmp; /* BEGIN CSTYLED */ const char *dataset = strtok_r(line, "\t", &toktmp); @@ -240,11 +231,9 @@ line_worker(char *line, const char *cachefile) if (p_nbmand == NULL) { fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n", getpid(), dataset); - return (1); + goto err; } - strncpy(argv0, dataset, strlen(argv0)); - /* Minimal pre-requisites to mount a ZFS dataset */ const char *after = "zfs-import.target"; const char *wants = "zfs-import.target"; @@ -280,28 +269,31 @@ line_worker(char *line, const char *cachefile) if (strcmp(p_encroot, "-") != 0) { - char *keyloadunit = + char *keyloadunit = *(tofree++) = systemd_escape(p_encroot, "zfs-load-key@", ".service"); + if (keyloadunit == NULL) + goto err; if (strcmp(dataset, p_encroot) == 0) { const char *keymountdep = NULL; bool is_prompt = false; + bool need_network = false; regmatch_t uri_matches[3]; if (regexec(&uri_regex, p_keyloc, - sizeof (uri_matches) / sizeof (*uri_matches), - uri_matches, 0) == 0) { + nitems(uri_matches), uri_matches, 0) == 0) { + p_keyloc[uri_matches[1].rm_eo] = '\0'; p_keyloc[uri_matches[2].rm_eo] = '\0'; + const char *scheme = + &p_keyloc[uri_matches[1].rm_so]; const char *path = &p_keyloc[uri_matches[2].rm_so]; - /* - * Assumes all URI keylocations need - * the mount for their path; - * http://, for example, wouldn't - * (but it'd need network-online.target et al.) - */ - keymountdep = path; + if (strcmp(scheme, "https") == 0 || + strcmp(scheme, "http") == 0) + need_network = true; + else + keymountdep = path; } else { if (strcmp(p_keyloc, "prompt") != 0) fprintf(stderr, PROGNAME "[%d]: %s: " @@ -321,7 +313,7 @@ line_worker(char *line, const char *cachefile) "couldn't open %s under %s: %s\n", getpid(), dataset, keyloadunit, destdir, strerror(errno)); - return (1); + goto err; } fprintf(keyloadunit_f, @@ -335,20 +327,22 @@ line_worker(char *line, const char *cachefile) "After=%s\n", dataset, cachefile, wants, after); + if (need_network) + fprintf(keyloadunit_f, + "Wants=network-online.target\n" + "After=network-online.target\n"); + if (p_systemd_requires) fprintf(keyloadunit_f, "Requires=%s\n", p_systemd_requires); - if (p_systemd_requiresmountsfor || keymountdep) { - fprintf(keyloadunit_f, "RequiresMountsFor="); - if (p_systemd_requiresmountsfor) - fprintf(keyloadunit_f, - "%s ", p_systemd_requiresmountsfor); - if (keymountdep) - fprintf(keyloadunit_f, - "'%s'", keymountdep); - fprintf(keyloadunit_f, "\n"); - } + if (p_systemd_requiresmountsfor) + fprintf(keyloadunit_f, + "RequiresMountsFor=%s\n", + p_systemd_requiresmountsfor); + if (keymountdep) + fprintf(keyloadunit_f, + "RequiresMountsFor='%s'\n", keymountdep); /* BEGIN CSTYLED */ fprintf(keyloadunit_f, @@ -393,9 +387,13 @@ line_worker(char *line, const char *cachefile) if (after[0] == '\0') after = keyloadunit; else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1) - after = toktmp; - else - EXIT_ENOMEM(); + after = *(tofree++) = toktmp; + else { + fprintf(stderr, PROGNAME "[%d]: %s: " + "out of memory to generate after=\"%s %s\"!\n", + getpid(), dataset, after, keyloadunit); + goto err; + } } @@ -404,12 +402,12 @@ line_worker(char *line, const char *cachefile) strcmp(p_systemd_ignore, "off") == 0) { /* ok */ } else if (strcmp(p_systemd_ignore, "on") == 0) - return (0); + goto end; else { fprintf(stderr, PROGNAME "[%d]: %s: " "invalid org.openzfs.systemd:ignore=%s\n", getpid(), dataset, p_systemd_ignore); - return (1); + goto err; } /* Check for canmount */ @@ -418,21 +416,21 @@ line_worker(char *line, const char *cachefile) } else if (strcmp(p_canmount, "noauto") == 0) noauto = true; else if (strcmp(p_canmount, "off") == 0) - return (0); + goto end; else { fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n", getpid(), dataset, p_canmount); - return (1); + goto err; } /* Check for legacy and blank mountpoints */ if (strcmp(p_mountpoint, "legacy") == 0 || strcmp(p_mountpoint, "none") == 0) - return (0); + goto end; else if (p_mountpoint[0] != '/') { fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n", getpid(), dataset, p_mountpoint); - return (1); + goto err; } /* Escape the mountpoint per systemd policy */ @@ -442,7 +440,7 @@ line_worker(char *line, const char *cachefile) fprintf(stderr, PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n", getpid(), dataset, p_mountpoint); - return (1); + goto err; } @@ -552,8 +550,7 @@ line_worker(char *line, const char *cachefile) * files if we're sure they were created by us. (see 5.) * 2. We handle files differently based on canmount. * Units with canmount=on always have precedence over noauto. - * This is enforced by the noauto_not_on_sem semaphore, - * which is only unlocked when the last canmount=on process exits. + * This is enforced by processing these units before all others. * It is important to use p_canmount and not noauto here, * since we categorise by canmount while other properties, * e.g. org.openzfs.systemd:wanted-by, also modify noauto. @@ -561,7 +558,7 @@ line_worker(char *line, const char *cachefile) * Additionally, we use noauto_files to track the unit file names * (which are the systemd-escaped mountpoints) of all (exclusively) * noauto datasets that had a file created. - * 4. If the file to be created is found in the tracking array, + * 4. If the file to be created is found in the tracking tree, * we do NOT create it. * 5. If a file exists for a noauto dataset, * we check whether the file name is in the array. @@ -571,29 +568,14 @@ line_worker(char *line, const char *cachefile) * further noauto datasets creating a file for this path again. */ - { - sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ? - &noauto_files->noauto_names_sem : - &noauto_files->noauto_not_on_sem; - while (sem_wait(our_sem) == -1 && errno == EINTR) - ; - } - struct stat stbuf; bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0; + bool is_known = tfind(mountfile, &noauto_files, STRCMP) != NULL; - bool is_known = false; - for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) { - if (strncmp( - noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) { - is_known = true; - break; - } - } - + *(tofree++) = (void *)mountfile; if (already_exists) { if (is_known) { - /* If it's in $noauto_files, we must be noauto too */ + /* If it's in noauto_files, we must be noauto too */ /* See 5 */ errno = 0; @@ -614,43 +596,31 @@ line_worker(char *line, const char *cachefile) } /* File exists: skip current dataset */ - if (strcmp(p_canmount, "on") == 0) - sem_post(&noauto_files->noauto_names_sem); - return (0); + goto end; } else { if (is_known) { /* See 4 */ - if (strcmp(p_canmount, "on") == 0) - sem_post(&noauto_files->noauto_names_sem); - return (0); + goto end; } else if (strcmp(p_canmount, "noauto") == 0) { - if (noauto_files->noauto_names_len == - noauto_files->noauto_names_max) + if (tsearch(mountfile, &noauto_files, STRCMP) == NULL) fprintf(stderr, PROGNAME "[%d]: %s: " - "noauto dataset limit (%zu) reached! " - "Not tracking %s. Please report this to " - "https://github.com/openzfs/zfs\n", - getpid(), dataset, - noauto_files->noauto_names_max, mountfile); - else { - strncpy(noauto_files->noauto_names[ - noauto_files->noauto_names_len], - mountfile, NAME_MAX); - ++noauto_files->noauto_names_len; - } + "out of memory for noauto datasets! " + "Not tracking %s.\n", + getpid(), dataset, mountfile); + else + /* mountfile escaped to noauto_files */ + *(--tofree) = NULL; } } FILE *mountfile_f = fopenat(destdir_fd, mountfile, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644); - if (strcmp(p_canmount, "on") == 0) - sem_post(&noauto_files->noauto_names_sem); if (!mountfile_f) { fprintf(stderr, PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n", getpid(), dataset, mountfile, destdir, strerror(errno)); - return (1); + goto err; } fprintf(mountfile_f, @@ -699,12 +669,17 @@ line_worker(char *line, const char *cachefile) (void) fclose(mountfile_f); if (!requiredby && !wantedby) - return (0); + goto end; /* Finally, create the appropriate dependencies */ char *linktgt; - if (asprintf(&linktgt, "../%s", mountfile) == -1) - EXIT_ENOMEM(); + if (asprintf(&linktgt, "../%s", mountfile) == -1) { + fprintf(stderr, PROGNAME "[%d]: %s: " + "out of memory for dependents of %s!\n", + getpid(), dataset, mountfile); + goto err; + } + *(tofree++) = linktgt; char *dependencies[][2] = { {"wants", wantedby}, @@ -719,8 +694,14 @@ line_worker(char *line, const char *cachefile) reqby; reqby = strtok_r(NULL, " ", &toktmp)) { char *depdir; - if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1) - EXIT_ENOMEM(); + if (asprintf( + &depdir, "%s.%s", reqby, (*dep)[0]) == -1) { + fprintf(stderr, PROGNAME "[%d]: %s: " + "out of memory for dependent dir name " + "\"%s.%s\"!\n", + getpid(), dataset, reqby, (*dep)[0]); + continue; + } (void) mkdirat(destdir_fd, depdir, 0755); int depdir_fd = openat(destdir_fd, depdir, @@ -746,7 +727,24 @@ line_worker(char *line, const char *cachefile) } } - return (0); +end: + if (tofree >= tofree_all + nitems(tofree_all)) { + /* + * This won't happen as-is: + * we've got 8 slots and allocate 4 things at most. + */ + fprintf(stderr, + PROGNAME "[%d]: %s: need to free %zu > %zu!\n", + getpid(), dataset, tofree - tofree_all, nitems(tofree_all)); + ret = tofree - tofree_all; + } + + while (tofree-- != tofree_all) + free(*tofree); + return (ret); +err: + ret = 1; + goto end; } @@ -780,12 +778,11 @@ main(int argc, char **argv) if (kmfd >= 0) { (void) dup2(kmfd, STDERR_FILENO); (void) close(kmfd); + + setlinebuf(stderr); } } - uint8_t debug = 0; - - argv0 = argv[0]; switch (argc) { case 1: /* Use default */ @@ -844,33 +841,9 @@ main(int argc, char **argv) } } - { - /* - * We could just get a gigabyte here and Not Care, - * but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored - * and we'd try (and likely fail) to rip it out of swap - */ - noauto_files = mmap(NULL, 4 * 1024 * 1024, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - if (noauto_files == MAP_FAILED) { - fprintf(stderr, - PROGNAME "[%d]: couldn't allocate IPC region: %s\n", - getpid(), strerror(errno)); - _exit(1); - } - - sem_init(&noauto_files->noauto_not_on_sem, true, 0); - sem_init(&noauto_files->noauto_names_sem, true, 1); - noauto_files->noauto_names_len = 0; - /* Works out to 16447ish, *well* enough */ - noauto_files->noauto_names_max = - (4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX; - } - + bool debug = false; char *line = NULL; size_t linelen = 0; - struct timespec time_start = {}; { const char *dbgenv = getenv("ZFS_DEBUG"); if (dbgenv) @@ -879,7 +852,7 @@ main(int argc, char **argv) FILE *cmdline = fopen("/proc/cmdline", "re"); if (cmdline != NULL) { if (getline(&line, &linelen, cmdline) >= 0) - debug = strstr(line, "debug") ? 2 : 0; + debug = strstr(line, "debug"); (void) fclose(cmdline); } } @@ -888,19 +861,17 @@ main(int argc, char **argv) dup2(STDERR_FILENO, STDOUT_FILENO); } - size_t forked_canmount_on = 0; - size_t forked_canmount_not_on = 0; - size_t canmount_on_pids_len = 128; - pid_t *canmount_on_pids = - malloc(canmount_on_pids_len * sizeof (*canmount_on_pids)); - if (canmount_on_pids == NULL) - canmount_on_pids_len = 0; - + struct timespec time_start = {}; if (debug) clock_gettime(CLOCK_MONOTONIC_RAW, &time_start); - ssize_t read; - pid_t pid; + struct line { + char *line; + const char *fname; + struct line *next; + } *lines_canmount_not_on = NULL; + + int ret = 0; struct dirent *cachent; while ((cachent = readdir(fslist_dir)) != NULL) { if (strcmp(cachent->d_name, ".") == 0 || @@ -916,129 +887,67 @@ main(int argc, char **argv) continue; } + const char *filename = FREE_STATICS ? "(elided)" : NULL; + + ssize_t read; while ((read = getline(&line, &linelen, cachefile)) >= 0) { line[read - 1] = '\0'; /* newline */ - switch (pid = fork()) { - case -1: - fprintf(stderr, - PROGNAME "[%d]: couldn't fork for %s: %s\n", - getpid(), line, strerror(errno)); - break; - case 0: /* child */ - _exit(line_worker(line, cachent->d_name)); - default: { /* parent */ - char *tmp; - char *dset = strtok_r(line, "\t", &tmp); - strtok_r(NULL, "\t", &tmp); - char *canmount = strtok_r(NULL, "\t", &tmp); - bool canmount_on = - canmount && strncmp(canmount, "on", 2) == 0; - - if (debug >= 2) - printf(PROGNAME ": forked %d, " - "canmount_on=%d, dataset=%s\n", - (int)pid, canmount_on, dset); - - if (canmount_on && - forked_canmount_on == - canmount_on_pids_len) { - size_t new_len = - (canmount_on_pids_len ?: 16) * 2; - void *new_pidlist = - realloc(canmount_on_pids, - new_len * - sizeof (*canmount_on_pids)); - if (!new_pidlist) { - fprintf(stderr, - PROGNAME "[%d]: " - "out of memory! " - "Mount ordering may be " - "affected.\n", getpid()); - continue; - } - - canmount_on_pids = new_pidlist; - canmount_on_pids_len = new_len; - } + char *canmount = line; + canmount += strcspn(canmount, "\t"); + canmount += strspn(canmount, "\t"); + canmount += strcspn(canmount, "\t"); + canmount += strspn(canmount, "\t"); + bool canmount_on = strncmp(canmount, "on", 2) == 0; - if (canmount_on) { - canmount_on_pids[forked_canmount_on] = - pid; - ++forked_canmount_on; - } else - ++forked_canmount_not_on; - break; - } + if (canmount_on) + ret |= line_worker(line, cachent->d_name); + else { + if (filename == NULL) + filename = + strdup(cachent->d_name) ?: "(?)"; + + struct line *l = calloc(1, sizeof (*l)); + char *nl = strdup(line); + if (l == NULL || nl == NULL) { + fprintf(stderr, PROGNAME "[%d]: " + "out of memory for \"%s\" in %s\n", + getpid(), line, cachent->d_name); + free(l); + free(nl); + continue; + } + l->line = nl; + l->fname = filename; + l->next = lines_canmount_not_on; + lines_canmount_not_on = l; } } - (void) fclose(cachefile); + fclose(cachefile); } free(line); - if (forked_canmount_on == 0) { - /* No canmount=on processes to finish, so don't deadlock here */ - for (size_t i = 0; i < forked_canmount_not_on; ++i) - sem_post(&noauto_files->noauto_not_on_sem); - } else { - /* Likely a no-op, since we got these from a narrow fork loop */ - qsort(canmount_on_pids, forked_canmount_on, - sizeof (*canmount_on_pids), PID_T_CMP); - } + while (lines_canmount_not_on) { + struct line *l = lines_canmount_not_on; + lines_canmount_not_on = l->next; - int status, ret = 0; - struct rusage usage; - size_t forked_canmount_on_max = forked_canmount_on; - while ((pid = wait4(-1, &status, 0, &usage)) != -1) { - ret |= WEXITSTATUS(status) | WTERMSIG(status); - - if (forked_canmount_on != 0) { - if (bsearch(&pid, canmount_on_pids, - forked_canmount_on_max, sizeof (*canmount_on_pids), - PID_T_CMP)) - --forked_canmount_on; - - if (forked_canmount_on == 0) { - /* - * All canmount=on processes have finished, - * let all the lower-priority ones finish now - */ - for (size_t i = 0; - i < forked_canmount_not_on; ++i) - sem_post( - &noauto_files->noauto_not_on_sem); - } + ret |= line_worker(l->line, l->fname); + if (FREE_STATICS) { + free(l->line); + free(l); } - - if (debug >= 2) - printf(PROGNAME ": %d done, user=%llu.%06us, " - "system=%llu.%06us, maxrss=%ldB, ex=0x%x\n", - (int)pid, - (unsigned long long) usage.ru_utime.tv_sec, - (unsigned int) usage.ru_utime.tv_usec, - (unsigned long long) usage.ru_stime.tv_sec, - (unsigned int) usage.ru_stime.tv_usec, - usage.ru_maxrss * 1024, status); } if (debug) { struct timespec time_end = {}; clock_gettime(CLOCK_MONOTONIC_RAW, &time_end); + struct rusage usage; getrusage(RUSAGE_SELF, &usage); printf( "\n" - PROGNAME ": self : " - "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n", - (unsigned long long) usage.ru_utime.tv_sec, - (unsigned int) usage.ru_utime.tv_usec, - (unsigned long long) usage.ru_stime.tv_sec, - (unsigned int) usage.ru_stime.tv_usec, - usage.ru_maxrss * 1024); - - getrusage(RUSAGE_CHILDREN, &usage); - printf(PROGNAME ": children: " + PROGNAME ": " "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n", (unsigned long long) usage.ru_utime.tv_sec, (unsigned int) usage.ru_utime.tv_usec, @@ -1068,7 +977,7 @@ main(int argc, char **argv) time_init.tv_nsec / 1000000000; time_init.tv_nsec %= 1000000000; - printf(PROGNAME ": wall : " + printf(PROGNAME ": " "total=%llu.%09llus = " "init=%llu.%09llus + real=%llu.%09llus\n", (unsigned long long) time_init.tv_sec, @@ -1077,7 +986,15 @@ main(int argc, char **argv) (unsigned long long) time_start.tv_nsec, (unsigned long long) time_end.tv_sec, (unsigned long long) time_end.tv_nsec); + + fflush(stdout); } + if (FREE_STATICS) { + closedir(fslist_dir); + tdestroy(noauto_files, free); + tdestroy(known_pools, free); + regfree(&uri_regex); + } _exit(ret); } diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index 7aa332ba8174..ae8937038e67 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -142,22 +142,11 @@ ZEDLET, if enabled .Pq see Xr zed 8 . . .Sh ENVIRONMENT -The +If the .Sy ZFS_DEBUG -environment variable can either be -.Sy 0 -(default), -.Sy 1 -(print summary accounting information at the end), or at least -.Sy 2 -(print accounting information for each subprocess as it finishes). -. -If not present, -.Pa /proc/cmdline -is additionally checked for -.Qq debug , -in which case the debug level is set to -.Sy 2 . +environment variable is nonzero +.Pq or unset and Pa /proc/cmdline No contains Qq Sy debug , +print summary accounting information at the end. . .Sh EXAMPLES To begin, enable tracking for the pool: From 9f6943504aec36f897f814fb7ae5987425436b11 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 30 Nov 2021 10:38:09 -0800 Subject: [PATCH 04/19] Default to zfs_dmu_offset_next_sync=1 Strict hole reporting was previously disabled by default as a performance optimization. However, this has lead to confusion over the expected behavior and a variety of workarounds being adopted by consumers of ZFS. Change the default behavior to always report holes and force the TXG sync. Reviewed-by: Matthew Ahrens Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Upstream-commit: 05b3eb6d232009db247882a39d518e7282630753 Ref: #13261 Closes #12746 --- man/man4/zfs.4 | 8 ++++---- module/zfs/dmu.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 657afc6169b6..3eeed8f439fa 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1581,12 +1581,12 @@ Allow no-operation writes. The occurrence of nopwrites will further depend on other pool properties .Pq i.a. the checksumming and compression algorithms . . -.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 0 Ns | Ns 1 Pq int +.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 1 Ns | Ns 0 Pq int Enable forcing TXG sync to find holes. -When enabled forces ZFS to act like prior versions when +When enabled forces ZFS to sync data when .Sy SEEK_HOLE No or Sy SEEK_DATA -flags are used, which, when a dnode is dirty, -causes TXGs to be synced so that this data can be found. +flags are used allowing holes in a file to be accurately reported. +When disabled holes will not be reported in recently dirtied files. . .It Sy zfs_pd_bytes_max Ns = Ns Sy 52428800 Ns B Po 50MB Pc Pq int The number of bytes which should be prefetched during a pool traversal, like diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 0c528f68ccf4..4e7127bd1bab 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -73,9 +73,13 @@ int zfs_nopwrite_enabled = 1; unsigned long zfs_per_txg_dirty_frees_percent = 5; /* - * Enable/disable forcing txg sync when dirty in dmu_offset_next. + * Enable/disable forcing txg sync when dirty checking for holes with lseek(). + * By default this is enabled to ensure accurate hole reporting, it can result + * in a significant performance penalty for lseek(SEEK_HOLE) heavy workloads. + * Disabling this option will result in holes never being reported in dirty + * files which is always safe. */ -int zfs_dmu_offset_next_sync = 0; +int zfs_dmu_offset_next_sync = 1; /* * Limit the amount we can prefetch with one call to this amount. This @@ -2110,8 +2114,8 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) * If the zfs_dmu_offset_next_sync module option is enabled * then strict hole reporting has been requested. Dirty * dnodes must be synced to disk to accurately report all - * holes. When disabled (the default) dirty dnodes are - * reported to not have any holes which is always safe. + * holes. When disabled dirty dnodes are reported to not + * have any holes which is always safe. * * When called by zfs_holey_common() the zp->z_rangelock * is held to prevent zfs_write() and mmap writeback from From 5a9994f5ae69448ad604e8184bd2ee8e437ffbca Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 11 Oct 2021 10:54:39 -0700 Subject: [PATCH 05/19] Export minimal zfs_refcount interfaces Lustre makes light use of the zfs_refcount interfaces which isn't a problem when using a non-debug build of OpenZFS. However, when debugging is enabled the required symbols are not exported. Reviewed-by: Olaf Faaland Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #12613 --- module/zfs/refcount.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index 354e021d9d26..35a379dded69 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -318,6 +318,14 @@ zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) return (B_TRUE); } +EXPORT_SYMBOL(zfs_refcount_create); +EXPORT_SYMBOL(zfs_refcount_destroy); +EXPORT_SYMBOL(zfs_refcount_is_zero); +EXPORT_SYMBOL(zfs_refcount_count); +EXPORT_SYMBOL(zfs_refcount_add); +EXPORT_SYMBOL(zfs_refcount_remove); +EXPORT_SYMBOL(zfs_refcount_held); + /* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, ,reference_tracking_enable, INT, ZMOD_RW, "Track reference holders to refcount_t objects"); From a5a28723bdd09903c55c4a6aa471e361392ff1b2 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Sat, 2 Apr 2022 15:10:55 -0400 Subject: [PATCH 06/19] FreeBSD: Use NDFREE_PNBUF if available NDF_ONLY_PNBUF has been removed from FreeBSD in favor of NDFREE_PNBUF. Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #13277 --- module/os/freebsd/zfs/zfs_vnops_os.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index b2cc3d063f9c..9e0de0f6f38a 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -97,6 +97,10 @@ VFS_SMR_DECLARE; +#if __FreeBSD_version < 1300103 +#define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF) +#endif + #if __FreeBSD_version >= 1300047 #define vm_page_wire_lock(pp) #define vm_page_wire_unlock(pp) @@ -5357,7 +5361,7 @@ zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) #endif error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); vp = nd.ni_vp; - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE_PNBUF(&nd); if (error != 0) return (error); @@ -5475,12 +5479,12 @@ zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) error = namei(&nd); vp = nd.ni_vp; if (error != 0) { - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE_PNBUF(&nd); return (error); } error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE_PNBUF(&nd); vput(nd.ni_dvp); if (vp == nd.ni_dvp) @@ -5605,7 +5609,7 @@ zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, NULL); vp = nd.ni_vp; - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE_PNBUF(&nd); if (error != 0) return (error); @@ -5760,7 +5764,7 @@ zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) #endif error = namei(&nd); vp = nd.ni_vp; - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE_PNBUF(&nd); if (error != 0) return (error); From 215a8255a97f32cb26881d6fb4f883b7608329d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 24 Mar 2022 15:22:53 +0100 Subject: [PATCH 07/19] Linux 5.18 compat: 4-argument bio_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) became bio_alloc(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) passing NULL/0 continues previous behaviour Upstream-commit: 07888c665b405b1cd3577ddebfeb74f4717a84c4 ("block: pass a block_device and opf to bio_alloc") Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13251 --- config/kernel-bio.m4 | 37 +++++++++++++++++++++++++++++++++ module/os/linux/zfs/vdev_disk.c | 4 ++++ 2 files changed, 41 insertions(+) diff --git a/config/kernel-bio.m4 b/config/kernel-bio.m4 index d088d7023cb0..b5d25448155f 100644 --- a/config/kernel-bio.m4 +++ b/config/kernel-bio.m4 @@ -474,6 +474,41 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [ ]) ]) +dnl # +dnl # Linux 5.18 API +dnl # +dnl # In 07888c665b405b1cd3577ddebfeb74f4717a84c4 ("block: pass a block_device and opf to bio_alloc") +dnl # bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) +dnl # became +dnl # bio_alloc(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) +dnl # however +dnl # > NULL/0 can be passed, both for the +dnl # > passthrough case on a raw request_queue and to temporarily avoid +dnl # > refactoring some nasty code. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG], [ + ZFS_LINUX_TEST_SRC([bio_alloc_4arg], [ + #include + ],[ + gfp_t gfp_mask = 0; + unsigned short nr_iovecs = 0; + struct block_device *bdev = NULL; + unsigned int opf = 0; + + struct bio *__attribute__((unused)) allocated = bio_alloc(bdev, nr_iovecs, opf, gfp_mask); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [ + AC_MSG_CHECKING([for 4-argument bio_alloc()]) + ZFS_LINUX_TEST_RESULT([bio_alloc_4arg],[ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_BIO_ALLOC_4ARG], 1, [bio_alloc() takes 4 arguments]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [ ZFS_AC_KERNEL_SRC_REQ ZFS_AC_KERNEL_SRC_BIO_OPS @@ -488,6 +523,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [ ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER + ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG ]) AC_DEFUN([ZFS_AC_KERNEL_BIO], [ @@ -512,4 +548,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO], [ ZFS_AC_KERNEL_BIO_BDEV_DISK ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID ZFS_AC_KERNEL_BLK_CGROUP_HEADER + ZFS_AC_KERNEL_BIO_ALLOC_4ARG ]) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 581a790865b7..23bb7c11d2ec 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -563,6 +563,10 @@ vdev_submit_bio(struct bio *bio) current->bio_list = bio_list; } +#ifdef HAVE_BIO_ALLOC_4ARG +#define bio_alloc(gfp_mask, nr_iovecs) bio_alloc(NULL, nr_iovecs, 0, gfp_mask) +#endif + static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, size_t io_size, uint64_t io_offset, int rw, int flags) From 9f7f70450791822518e6e9dc515a4ded47e74c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 24 Mar 2022 16:33:40 +0100 Subject: [PATCH 08/19] Linux 5.18 compat: replace genhd.h with blkdev.h includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blkdev.h includes genhd.h since dawn of upstream git, so this is globally safe Upstream-commit: 322cbb50de711814c42fb088f6d31901502c711a ("block: remove genhd.h") Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13251 --- config/kernel-add-disk.m4 | 5 ++--- config/kernel-revalidate-disk-size.m4 | 4 ++-- module/os/linux/zfs/zvol_os.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/config/kernel-add-disk.m4 b/config/kernel-add-disk.m4 index 5d1779eb4328..44a8a5fd25b6 100644 --- a/config/kernel-add-disk.m4 +++ b/config/kernel-add-disk.m4 @@ -3,16 +3,15 @@ dnl # 5.16 API change dnl # add_disk grew a must-check return code dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [ - ZFS_LINUX_TEST_SRC([add_disk_ret], [ - #include + #include ], [ struct gendisk *disk = NULL; int err = add_disk(disk); err = err; ]) - ]) + AC_DEFUN([ZFS_AC_KERNEL_ADD_DISK], [ AC_MSG_CHECKING([whether add_disk() returns int]) ZFS_LINUX_TEST_RESULT([add_disk_ret], diff --git a/config/kernel-revalidate-disk-size.m4 b/config/kernel-revalidate-disk-size.m4 index a7d0cb3cdab4..13cb92a174e3 100644 --- a/config/kernel-revalidate-disk-size.m4 +++ b/config/kernel-revalidate-disk-size.m4 @@ -8,14 +8,14 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_REVALIDATE_DISK], [ ZFS_LINUX_TEST_SRC([revalidate_disk_size], [ - #include + #include ], [ struct gendisk *disk = NULL; (void) revalidate_disk_size(disk, false); ]) ZFS_LINUX_TEST_SRC([revalidate_disk], [ - #include + #include ], [ struct gendisk *disk = NULL; (void) revalidate_disk(disk); diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 7a979eb91e7f..b1321df8da4b 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -906,7 +906,7 @@ zvol_alloc(dev_t dev, const char *name) if (volmode == ZFS_VOLMODE_DEV) { /* * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set - * gendisk->minors = 1 as noted in include/linux/genhd.h. + * gendisk->minors = 1 as noted in include/linux/blkdev.h. * Also disable extended partition numbers (GENHD_FL_EXT_DEVT) * and suppresses partition scanning (GENHD_FL_NO_PART_SCAN) * setting gendisk->flags accordingly. From 10a9f5fc47acc6c9d3475023001264ca5fb741a6 Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Fri, 1 Apr 2022 23:47:36 +0200 Subject: [PATCH 09/19] Linux 5.18 compat: blkg_tryget is moved to private headers Reviewed-by: Brian Behlendorf Signed-off-by: Riccardo Schirone Closes #13278 --- module/os/linux/zfs/vdev_disk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 23bb7c11d2ec..2708535b3b88 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -467,8 +467,11 @@ vdev_submit_bio_impl(struct bio *bio) * blkg_tryget() to use rcu_read_lock() instead of rcu_read_lock_sched(). * As a side effect the function was converted to GPL-only. Define our * own version when needed which uses rcu_read_lock_sched(). + * + * The Linux 5.17 kernel split linux/blk-cgroup.h into a private and a public + * part, moving blkg_tryget into the private one. Define our own version. */ -#if defined(HAVE_BLKG_TRYGET_GPL_ONLY) +#if defined(HAVE_BLKG_TRYGET_GPL_ONLY) || !defined(HAVE_BLKG_TRYGET) static inline bool vdev_blkg_tryget(struct blkcg_gq *blkg) { @@ -493,7 +496,7 @@ vdev_blkg_tryget(struct blkcg_gq *blkg) return (rc); } -#elif defined(HAVE_BLKG_TRYGET) +#else #define vdev_blkg_tryget(bg) blkg_tryget(bg) #endif #ifdef HAVE_BIO_SET_DEV_MACRO From 35ddd8ee2e5fdbaa5b0d94c063bbc1a79ab12cb1 Mon Sep 17 00:00:00 2001 From: Riccardo Schirone Date: Sat, 2 Apr 2022 01:15:25 +0200 Subject: [PATCH 10/19] Linux 5.18 compat: use address_space_operations->readahead ->readpages was removed and replaced by ->readahead. Define zpl_readahead for kernels that don't have ->readpages. Reviewed-by: Brian Behlendorf Signed-off-by: Riccardo Schirone Closes #13278 --- config/kernel-readpages.m4 | 25 +++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zpl_file.c | 21 +++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 config/kernel-readpages.m4 diff --git a/config/kernel-readpages.m4 b/config/kernel-readpages.m4 new file mode 100644 index 000000000000..0bf67ffd933d --- /dev/null +++ b/config/kernel-readpages.m4 @@ -0,0 +1,25 @@ +dnl # +dnl # Linux 5.18 removes address_space_operations ->readpages in favour of +dnl # ->readahead +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READPAGES], [ + ZFS_LINUX_TEST_SRC([vfs_has_readpages], [ + #include + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .readpages = NULL, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_READPAGES], [ + AC_MSG_CHECKING([address_space_operations->readpages exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_readpages], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_READPAGES, 1, + [address_space_operations->readpages exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 3122e9dbaa94..7e546b58a77c 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SIGNAL_STOP ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE + ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT @@ -242,6 +243,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SIGNAL_STOP ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SET_SPECIAL_STATE + ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index f1241c443797..4965815ece17 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -647,12 +647,29 @@ zpl_readpage_filler(void *data, struct page *pp) * paging. For simplicity, the code relies on read_cache_pages() to * correctly lock each page for IO and call zpl_readpage(). */ +#ifdef HAVE_VFS_READPAGES static int zpl_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL)); } +#else +static void +zpl_readahead(struct readahead_control *ractl) +{ + struct page *page; + + while ((page = readahead_page(ractl)) != NULL) { + int ret; + + ret = zpl_readpage_filler(NULL, page); + put_page(page); + if (ret) + break; + } +} +#endif static int zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) @@ -1027,7 +1044,11 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) const struct address_space_operations zpl_address_space_operations = { +#ifdef HAVE_VFS_READPAGES .readpages = zpl_readpages, +#else + .readahead = zpl_readahead, +#endif .readpage = zpl_readpage, .writepage = zpl_writepage, .writepages = zpl_writepages, From 1f4c79b1ce0b27b747149b2820fa5f6f051b7d4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Mon, 4 Apr 2022 14:25:01 +0200 Subject: [PATCH 11/19] libzfs: sendrecv: always cancel progress thread in zfs_send_one() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in line with all the other uses of the progress thread Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #11560 Closes #13284 --- lib/libzfs/libzfs_sendrecv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index b67c9b30c84a..60050e22a31a 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2522,8 +2522,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags, if (flags->progress) { void *status = NULL; - if (err != 0) - (void) pthread_cancel(ptid); + (void) pthread_cancel(ptid); (void) pthread_join(ptid, &status); int error = (int)(uintptr_t)status; if (error != 0 && status != PTHREAD_CANCELED) From ff23ef0c99a71e8697b4a39886b0f62b4ecf935d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 7 Apr 2022 04:32:27 +0200 Subject: [PATCH 12/19] libzfs: import: zpool_clear_label: actually fail if clearing l2arc header fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found with -Wunused-but-set-variable on Clang trunk Upstream-commit: a4e0cee1780cbd8f2cb9a263a0ed8d91dbe68b4a Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13304 --- lib/libzfs/libzfs_import.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 64fa31c67d0f..ddaa5de5db11 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -146,10 +146,10 @@ zpool_clear_label(int fd) struct stat64 statbuf; int l; vdev_label_t *label; - l2arc_dev_hdr_phys_t *l2dhdr; + l2arc_dev_hdr_phys_t *l2dhdr = NULL; uint64_t size; - int labels_cleared = 0, header_cleared = 0; - boolean_t clear_l2arc_header = B_FALSE; + int labels_cleared = 0; + boolean_t clear_l2arc_header = B_FALSE, header_cleared = B_FALSE; if (fstat64_blk(fd, &statbuf) == -1) return (0); @@ -219,13 +219,10 @@ zpool_clear_label(int fd) } /* Clear the L2ARC header. */ - if (clear_l2arc_header) { - memset(l2dhdr, 0, sizeof (l2arc_dev_hdr_phys_t)); - if (pwrite64(fd, l2dhdr, sizeof (l2arc_dev_hdr_phys_t), - VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t)) { - header_cleared++; - } - } + if (clear_l2arc_header && + pwrite64(fd, l2dhdr, sizeof (l2arc_dev_hdr_phys_t), + VDEV_LABEL_START_SIZE) == sizeof (l2arc_dev_hdr_phys_t)) + header_cleared = B_TRUE; free(label); free(l2dhdr); @@ -233,6 +230,9 @@ zpool_clear_label(int fd) if (labels_cleared == 0) return (-1); + if (clear_l2arc_header && !header_cleared) + return (-1); + return (0); } From e9cd90f6e5953df79ae5db84e3b23f339d6098f7 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 7 Apr 2022 17:11:00 -0400 Subject: [PATCH 13/19] FreeBSD: Parameterize ZFS_ENTER/ZFS_VERIFY_VP with an error code For legacy reasons, a couple of VOPs have to return error numbers that don't come from the usual errno namespace. To handle the cases where ZFS_ENTER or ZFS_VERIFY_ZP fail, we need to be able to override the default error return value of EIO. Extend the macros to permit this. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Mark Johnston Closes #13311 --- include/os/freebsd/zfs/sys/zfs_znode_impl.h | 32 ++++++++++++--------- module/os/freebsd/zfs/zfs_vnops_os.c | 2 +- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h index edb28d041a0c..3d93525b45ab 100644 --- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h +++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h @@ -123,25 +123,29 @@ extern minor_t zfsdev_minor_alloc(void); #define zn_rlimit_fsize(zp, uio) \ vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio)) +#define ZFS_ENTER_ERROR(zfsvfs, error) do { \ + ZFS_TEARDOWN_ENTER_READ((zfsvfs), FTAG); \ + if (__predict_false((zfsvfs)->z_unmounted)) { \ + ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG); \ + return (error); \ + } \ +} while (0) + /* Called on entry to each ZFS vnode and vfs operation */ -#define ZFS_ENTER(zfsvfs) \ - { \ - ZFS_TEARDOWN_ENTER_READ((zfsvfs), FTAG); \ - if (__predict_false((zfsvfs)->z_unmounted)) { \ - ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG); \ - return (EIO); \ - } \ - } +#define ZFS_ENTER(zfsvfs) ZFS_ENTER_ERROR(zfsvfs, EIO) /* Must be called before exiting the vop */ -#define ZFS_EXIT(zfsvfs) ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG) +#define ZFS_EXIT(zfsvfs) ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG) + +#define ZFS_VERIFY_ZP_ERROR(zp, error) do { \ + if (__predict_false((zp)->z_sa_hdl == NULL)) { \ + ZFS_EXIT((zp)->z_zfsvfs); \ + return (error); \ + } \ +} while (0) /* Verifies the znode is valid */ -#define ZFS_VERIFY_ZP(zp) \ - if (__predict_false((zp)->z_sa_hdl == NULL)) { \ - ZFS_EXIT((zp)->z_zfsvfs); \ - return (EIO); \ - } \ +#define ZFS_VERIFY_ZP(zp) ZFS_VERIFY_ZP_ERROR(zp, EIO) /* * Macros for dealing with dmu_buf_hold diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 9e0de0f6f38a..75a4fd183310 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -5434,7 +5434,7 @@ zfs_getextattr(struct vop_getextattr_args *ap) error = ENOENT; ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp) + ZFS_VERIFY_ZP(zp); rw_enter(&zp->z_xattr_lock, RW_READER); if (zfsvfs->z_use_sa && zp->z_is_sa) error = zfs_getextattr_sa(ap, attrname); From b7546f92eaeb295cf0302662773792a2a8d359a5 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 7 Apr 2022 17:13:18 -0400 Subject: [PATCH 14/19] FreeBSD: Return Mach error codes from VOP_(GET|PUT)PAGES FreeBSD's memory management system uses its own error numbers and gets confused when these VOPs return EIO. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Reported-by: Peter Holm Signed-off-by: Mark Johnston Closes #13311 --- module/os/freebsd/zfs/zfs_vnops_os.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 75a4fd183310..d273e70981db 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4062,8 +4062,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, int pgsin_b, pgsin_a; int error; - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); + ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error); + ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error); start = IDX_TO_OFF(ma[0]->pindex); end = IDX_TO_OFF(ma[count - 1]->pindex + 1); @@ -4187,19 +4187,18 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, int err; int i; - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - object = vp->v_object; - pcount = btoc(len); - ncount = pcount; - KASSERT(ma[0]->object == object, ("mismatching object")); KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); + pcount = btoc(len); + ncount = pcount; for (i = 0; i < pcount; i++) rtvals[i] = zfs_vm_pagerret_error; + ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error); + ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error); + off = IDX_TO_OFF(ma[0]->pindex); blksz = zp->z_blksz; lo_off = rounddown(off, blksz); From aa1c3c1d1d829a877e56c8da6390d40f7656e8bb Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 19 Apr 2022 10:38:04 -0700 Subject: [PATCH 15/19] Linux 5.17 compat: GENHD_FL_EXT_DEVT / GENHD_FL_NO_PART_SCAN As of the 5.17 kernel the GENHD_FL_EXT_DEVT flag has been removed and the GENHD_FL_NO_PART_SCAN flag renamed GENHD_FL_NO_PART. Update zvol_alloc() to set GENHD_FL_NO_PART for the newer kernels which is sufficient. The behavior for prior kernels remains unchanged. 1ebe2e5f ("block: remove GENHD_FL_EXT_DEVT") 46e7eac6 ("block: rename GENHD_FL_NO_PART_SCAN to GENHD_FL_NO_PART") Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13294 Closes #13297 --- config/kernel-genhd-flags.m4 | 58 +++++++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zvol_os.c | 21 +++++-------- 3 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 config/kernel-genhd-flags.m4 diff --git a/config/kernel-genhd-flags.m4 b/config/kernel-genhd-flags.m4 new file mode 100644 index 000000000000..af6a8a086bc9 --- /dev/null +++ b/config/kernel-genhd-flags.m4 @@ -0,0 +1,58 @@ +dnl # +dnl # 5.17 API change, +dnl # +dnl # GENHD_FL_EXT_DEVT flag removed +dnl # GENHD_FL_NO_PART_SCAN renamed GENHD_FL_NO_PART +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENHD_FLAGS], [ + + ZFS_LINUX_TEST_SRC([genhd_fl_ext_devt], [ + #include + ], [ + int flags __attribute__ ((unused)) = GENHD_FL_EXT_DEVT; + ]) + + ZFS_LINUX_TEST_SRC([genhd_fl_no_part], [ + #include + ], [ + int flags __attribute__ ((unused)) = GENHD_FL_NO_PART; + ]) + + ZFS_LINUX_TEST_SRC([genhd_fl_no_part_scan], [ + #include + ], [ + int flags __attribute__ ((unused)) = GENHD_FL_NO_PART_SCAN; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [ + + AC_MSG_CHECKING([whether GENHD_FL_EXT_DEVT flag is available]) + ZFS_LINUX_TEST_RESULT([genhd_fl_ext_devt], [ + AC_MSG_RESULT(yes) + AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, GENHD_FL_EXT_DEVT, + [GENHD_FL_EXT_DEVT flag is available]) + ], [ + AC_MSG_RESULT(no) + AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, 0, + [GENHD_FL_EXT_DEVT flag is not available]) + ]) + + AC_MSG_CHECKING([whether GENHD_FL_NO_PART flag is available]) + ZFS_LINUX_TEST_RESULT([genhd_fl_no_part], [ + AC_MSG_RESULT(yes) + AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART, + [GENHD_FL_NO_PART flag is available]) + ], [ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether GENHD_FL_NO_PART_SCAN flag is available]) + ZFS_LINUX_TEST_RESULT([genhd_fl_no_part_scan], [ + AC_MSG_RESULT(yes) + AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART_SCAN, + [GENHD_FL_NO_PART_SCAN flag is available]) + ], [ + ZFS_LINUX_TEST_ERROR([GENHD_FL_NO_PART|GENHD_FL_NO_PART_SCAN]) + ]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 7e546b58a77c..0ef7c7d0caa7 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -61,6 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_BIO ZFS_AC_KERNEL_SRC_BLKDEV ZFS_AC_KERNEL_SRC_BLK_QUEUE + ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL @@ -172,6 +173,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_BIO ZFS_AC_KERNEL_BLKDEV ZFS_AC_KERNEL_BLK_QUEUE + ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index b1321df8da4b..5c509a06a4fb 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -903,22 +903,17 @@ zvol_alloc(dev_t dev, const char *name) zso->zvo_disk->major = zvol_major; zso->zvo_disk->events = DISK_EVENT_MEDIA_CHANGE; + /* + * Setting ZFS_VOLMODE_DEV disables partitioning on ZVOL devices. + * This is accomplished by limiting the number of minors for the + * device to one and explicitly disabling partition scanning. + */ if (volmode == ZFS_VOLMODE_DEV) { - /* - * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set - * gendisk->minors = 1 as noted in include/linux/blkdev.h. - * Also disable extended partition numbers (GENHD_FL_EXT_DEVT) - * and suppresses partition scanning (GENHD_FL_NO_PART_SCAN) - * setting gendisk->flags accordingly. - */ zso->zvo_disk->minors = 1; -#if defined(GENHD_FL_EXT_DEVT) - zso->zvo_disk->flags &= ~GENHD_FL_EXT_DEVT; -#endif -#if defined(GENHD_FL_NO_PART_SCAN) - zso->zvo_disk->flags |= GENHD_FL_NO_PART_SCAN; -#endif + zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT; + zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART; } + zso->zvo_disk->first_minor = (dev & MINORMASK); zso->zvo_disk->fops = &zvol_ops; zso->zvo_disk->private_data = zv; From 361dc138b1bdd6b7ec32fbd74b775c55c595f45f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 16 Apr 2022 16:07:04 +0200 Subject: [PATCH 16/19] Document zfs inherit -S's interaction with noninheritable properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Damian Szuberski Signed-off-by: Ahelenia Ziemiańska Upstream-commit: 92295af8004b07a58e0860d66dd565143486a757 Closes #11894 Closes #13335 --- man/man8/zfs-set.8 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/man8/zfs-set.8 b/man/man8/zfs-set.8 index a3588cc26638..ccd90f0917ea 100644 --- a/man/man8/zfs-set.8 +++ b/man/man8/zfs-set.8 @@ -170,8 +170,9 @@ inherited. .It Fl r Recursively inherit the given property for all children. .It Fl S -Revert the property to the received value if one exists; otherwise operate as -if the +Revert the property to the received value, if one exists; +otherwise, for non-inheritable properties, to the default; +otherwise, operate as if the .Fl S option was not specified. .El From c220771a47e4206fb43e6849957657c9504b1b14 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Wed, 20 Apr 2022 19:07:03 -0400 Subject: [PATCH 17/19] Corrected oversight in ZERO_RANGE behavior It turns out, no, in fact, ZERO_RANGE and PUNCH_HOLE do have differing semantics in some ways - in particular, one requires KEEP_SIZE, and the other does not. Also added a zero-range test to catch this, corrected a flaw that made the punch-hole test succeed vacuously, and a typo in file_write. Reviewed-by: Brian Behlendorf Signed-off-by: Rich Ercolani Closes #13329 Closes #13338 --- module/os/linux/zfs/zpl_file.c | 10 +- tests/runfiles/linux.run | 2 +- tests/zfs-tests/cmd/file_write/file_write.c | 2 +- tests/zfs-tests/include/libtest.shlib | 16 +++ .../tests/functional/fallocate/Makefile.am | 3 +- .../fallocate/fallocate_punch-hole.ksh | 35 ++++-- .../fallocate/fallocate_zero-range.ksh | 119 ++++++++++++++++++ .../tests/functional/fallocate/setup.ksh | 5 +- 8 files changed, 171 insertions(+), 21 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 4965815ece17..1ed41b9c4c20 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -781,11 +781,13 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) if (mode & (test_mode)) { flock64_t bf; - if (offset > olen) - goto out_unmark; + if (mode & FALLOC_FL_KEEP_SIZE) { + if (offset > olen) + goto out_unmark; - if (offset + len > olen) - len = olen - offset; + if (offset + len > olen) + len = olen - offset; + } bf.l_type = F_WRLCK; bf.l_whence = SEEK_SET; bf.l_start = offset; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index c01e1e3c4d53..94c1cbbc3f9f 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -94,7 +94,7 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill'] tags = ['functional', 'events'] [tests/functional/fallocate:Linux] -tests = ['fallocate_prealloc'] +tests = ['fallocate_prealloc', 'fallocate_zero-range'] tags = ['functional', 'fallocate'] [tests/functional/fault:Linux] diff --git a/tests/zfs-tests/cmd/file_write/file_write.c b/tests/zfs-tests/cmd/file_write/file_write.c index 60893c34fbc9..9d2e71b679eb 100644 --- a/tests/zfs-tests/cmd/file_write/file_write.c +++ b/tests/zfs-tests/cmd/file_write/file_write.c @@ -251,7 +251,7 @@ usage(char *prog) "\t[-s offset] [-c write_count] [-d data]\n\n" "Where [data] equal to zero causes chars " "0->%d to be repeated throughout, or [data]\n" - "equal to 'R' for psudorandom data.\n", + "equal to 'R' for pseudorandom data.\n", prog, DATA_RANGE); exit(1); diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index dd43b02a6868..94ab7ffd20f7 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -4236,6 +4236,22 @@ function punch_hole # offset length file esac } +function zero_range # offset length file +{ + typeset offset=$1 + typeset length=$2 + typeset file=$3 + + case "$UNAME" in + Linux) + fallocate --zero-range --offset $offset --length $length "$file" + ;; + *) + false + ;; + esac +} + # # Wait for the specified arcstat to reach non-zero quiescence. # If echo is 1 echo the value after reaching quiescence, otherwise diff --git a/tests/zfs-tests/tests/functional/fallocate/Makefile.am b/tests/zfs-tests/tests/functional/fallocate/Makefile.am index 5ff366d2482c..86364d7895dd 100644 --- a/tests/zfs-tests/tests/functional/fallocate/Makefile.am +++ b/tests/zfs-tests/tests/functional/fallocate/Makefile.am @@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ fallocate_prealloc.ksh \ - fallocate_punch-hole.ksh + fallocate_punch-hole.ksh \ + fallocate_zero-range.ksh diff --git a/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh b/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh index ed83561bd556..92f4552f5bd7 100755 --- a/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh +++ b/tests/zfs-tests/tests/functional/fallocate/fallocate_punch-hole.ksh @@ -60,13 +60,17 @@ function cleanup [[ -e $TESTDIR ]] && log_must rm -f $FILE } -function check_disk_size +function check_reported_size { typeset expected_size=$1 - disk_size=$(du $TESTDIR/file | awk '{print $1}') - if [ $disk_size -ne $expected_size ]; then - log_fail "Incorrect size: $disk_size != $expected_size" + if ! [ -e "${FILE}" ]; then + log_fail "$FILE does not exist" + fi + + reported_size=$(du "${FILE}" | awk '{print $1}') + if [ "$reported_size" != "$expected_size" ]; then + log_fail "Incorrect reported size: $reported_size != $expected_size" fi } @@ -74,9 +78,9 @@ function check_apparent_size { typeset expected_size=$1 - apparent_size=$(stat_size) - if [ $apparent_size -ne $expected_size ]; then - log_fail "Incorrect size: $apparent_size != $expected_size" + apparent_size=$(stat_size "${FILE}") + if [ "$apparent_size" != "$expected_size" ]; then + log_fail "Incorrect apparent size: $apparent_size != $expected_size" fi } @@ -86,25 +90,30 @@ log_onexit cleanup # Create a dense file and check it is the correct size. log_must file_write -o create -f $FILE -b $BLKSZ -c 8 -log_must check_disk_size $((131072 * 8)) +sync_pool $TESTPOOL +log_must check_reported_size 1027 # Punch a hole for the first full block. log_must punch_hole 0 $BLKSZ $FILE -log_must check_disk_size $((131072 * 7)) +sync_pool $TESTPOOL +log_must check_reported_size 899 # Partially punch a hole in the second block. log_must punch_hole $BLKSZ $((BLKSZ / 2)) $FILE -log_must check_disk_size $((131072 * 7)) +sync_pool $TESTPOOL +log_must check_reported_size 899 -# Punch a hole which overlaps the third and forth block. +# Punch a hole which overlaps the third and fourth block. log_must punch_hole $(((BLKSZ * 2) + (BLKSZ / 2))) $((BLKSZ)) $FILE -log_must check_disk_size $((131072 * 7)) +sync_pool $TESTPOOL +log_must check_reported_size 899 # Punch a hole from the fifth block past the end of file. The apparent # file size should not change since --keep-size is implied. apparent_size=$(stat_size $FILE) log_must punch_hole $((BLKSZ * 4)) $((BLKSZ * 10)) $FILE -log_must check_disk_size $((131072 * 4)) +sync_pool $TESTPOOL +log_must check_reported_size 387 log_must check_apparent_size $apparent_size log_pass "Ensure holes can be punched in files making them sparse" diff --git a/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh b/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh new file mode 100755 index 000000000000..e907b0f5d4c4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fallocate/fallocate_zero-range.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2021 by The FreeBSD Foundation. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test FALLOC_FL_ZERO_RANGE functionality +# +# STRATEGY: +# 1. Create a dense file +# 2. Zero various ranges in the file and verify the result. +# + +verify_runnable "global" + +if is_freebsd; then + log_unsupported "FreeBSD does not implement an analogue to ZERO_RANGE." +fi + +FILE=$TESTDIR/$TESTFILE0 +BLKSZ=$(get_prop recordsize $TESTPOOL) + +function cleanup +{ + [[ -e $TESTDIR ]] && log_must rm -f $FILE +} + +# Helpfully, this function expects kilobytes, and check_apparent_size expects bytes. +function check_reported_size +{ + typeset expected_size=$1 + + if ! [ -e "${FILE}" ]; then + log_fail "$FILE does not exist" + fi + + reported_size=$(du "${FILE}" | awk '{print $1}') + if [ "$reported_size" != "$expected_size" ]; then + log_fail "Incorrect reported size: $reported_size != $expected_size" + fi +} + +function check_apparent_size +{ + typeset expected_size=$1 + + apparent_size=$(stat_size "${FILE}") + if [ "$apparent_size" != "$expected_size" ]; then + log_fail "Incorrect apparent size: $apparent_size != $expected_size" + fi +} + +log_assert "Ensure ranges can be zeroed in files" + +log_onexit cleanup + +# Create a dense file and check it is the correct size. +log_must file_write -o create -f $FILE -b $BLKSZ -c 8 +sync_pool $TESTPOOL +log_must check_reported_size 1027 + +# Zero a range covering the first full block. +log_must zero_range 0 $BLKSZ $FILE +sync_pool $TESTPOOL +log_must check_reported_size 899 + +# Partially zero a range in the second block. +log_must zero_range $BLKSZ $((BLKSZ / 2)) $FILE +sync_pool $TESTPOOL +log_must check_reported_size 899 + +# Zero range which overlaps the third and fourth block. +log_must zero_range $(((BLKSZ * 2) + (BLKSZ / 2))) $((BLKSZ)) $FILE +sync_pool $TESTPOOL +log_must check_reported_size 899 + +# Zero range from the fifth block past the end of file, with --keep-size. +# The apparent file size must not change, since we did specify --keep-size. +apparent_size=$(stat_size $FILE) +log_must fallocate --keep-size --zero-range --offset $((BLKSZ * 4)) --length $((BLKSZ * 10)) "$FILE" +sync_pool $TESTPOOL +log_must check_reported_size 387 +log_must check_apparent_size $apparent_size + +# Zero range from the fifth block past the end of file. The apparent +# file size should change since --keep-size is not implied, unlike +# with PUNCH_HOLE. +apparent_size=$(stat_size $FILE) +log_must zero_range $((BLKSZ * 4)) $((BLKSZ * 10)) $FILE +sync_pool $TESTPOOL +log_must check_reported_size 387 +log_must check_apparent_size $((BLKSZ * 14)) + +log_pass "Ensure ranges can be zeroed in files" diff --git a/tests/zfs-tests/tests/functional/fallocate/setup.ksh b/tests/zfs-tests/tests/functional/fallocate/setup.ksh index 32334d396865..586ac026aa43 100755 --- a/tests/zfs-tests/tests/functional/fallocate/setup.ksh +++ b/tests/zfs-tests/tests/functional/fallocate/setup.ksh @@ -26,4 +26,7 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} -default_setup $DISK +default_setup_noexit $DISK +log_must zfs set compression=off $TESTPOOL +log_pass + From 972637dc06a04432dc58e240b8ef3e9f538b98bb Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 18 Apr 2022 20:25:49 -0400 Subject: [PATCH 18/19] FreeBSD: Fix translation from ABD to physical pages. In hypothetical case of non-linear ABD with single segment, multiple to page size but not aligned to it, vdev_geom_fill_unmap_cb() could fill one page less into bio_ma array. I am not sure it is expoitable, but better to be safe than sorry. Reported-by: Mark Johnston Signed-off-by: Alexander Motin (cherry picked from commit 5352f85cddce44e82fb1c4caec3b333e3666d7fd) --- module/os/freebsd/zfs/vdev_geom.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/module/os/freebsd/zfs/vdev_geom.c b/module/os/freebsd/zfs/vdev_geom.c index 2ef4811a8a4e..5447eb922062 100644 --- a/module/os/freebsd/zfs/vdev_geom.c +++ b/module/os/freebsd/zfs/vdev_geom.c @@ -1132,8 +1132,12 @@ vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv) vm_offset_t addr = (vm_offset_t)buf; vm_offset_t end = addr + len; - if (bp->bio_ma_n == 0) + if (bp->bio_ma_n == 0) { bp->bio_ma_offset = addr & PAGE_MASK; + addr &= ~PAGE_MASK; + } else { + ASSERT0(P2PHASE(addr, PAGE_SIZE)); + } do { bp->bio_ma[bp->bio_ma_n++] = PHYS_TO_VM_PAGE(pmap_kextract(addr)); From 642426095ae165ce64896c11ec52cab3ec3073ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 21 Apr 2022 16:27:15 +0200 Subject: [PATCH 19/19] Linux 5.18 compat: kobj_type.default_attrs replaced with default_groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream-commit: cdb4f26a63c391317e335e6e683a614358e70aeb ("kobject: kobj_type: remove default_attrs") Upstream-commit: 0cdda2edb3f312d56518934811960ad57564c1bb Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13357 --- config/kernel-sysfs.m4 | 37 +++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zfs_sysfs.c | 39 +++++++++++++++++++-------------- 3 files changed, 61 insertions(+), 17 deletions(-) create mode 100644 config/kernel-sysfs.m4 diff --git a/config/kernel-sysfs.m4 b/config/kernel-sysfs.m4 new file mode 100644 index 000000000000..668def5fe6bf --- /dev/null +++ b/config/kernel-sysfs.m4 @@ -0,0 +1,37 @@ +dnl # +dnl # Linux 5.2/5.18 API +dnl # +dnl # In cdb4f26a63c391317e335e6e683a614358e70aeb ("kobject: kobj_type: remove default_attrs") +dnl # struct kobj_type.default_attrs +dnl # was finally removed in favour of +dnl # struct kobj_type.default_groups +dnl # +dnl # This was added in aa30f47cf666111f6bbfd15f290a27e8a7b9d854 ("kobject: Add support for default attribute groups to kobj_type"), +dnl # if both are present (5.2-5.17), we prefer default_groups; they're otherwise equivalent +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS], [ + ZFS_LINUX_TEST_SRC([sysfs_default_groups], [ + #include + ],[ + struct kobj_type __attribute__ ((unused)) kt = { + .default_groups = (const struct attribute_group **)NULL }; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS], [ + AC_MSG_CHECKING([for struct kobj_type.default_groups]) + ZFS_LINUX_TEST_RESULT([sysfs_default_groups],[ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_SYSFS_DEFAULT_GROUPS], 1, [struct kobj_type has default_groups]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS], [ + ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS +]) + +AC_DEFUN([ZFS_AC_KERNEL_SYSFS], [ + ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 0ef7c7d0caa7..bb3fe2ebdd14 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS ZFS_AC_KERNEL_SRC_SIGNAL_STOP ZFS_AC_KERNEL_SRC_SIGINFO + ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS @@ -244,6 +245,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_BIO_MAX_SEGS ZFS_AC_KERNEL_SIGNAL_STOP ZFS_AC_KERNEL_SIGINFO + ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_SET_SPECIAL_STATE ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS diff --git a/module/os/linux/zfs/zfs_sysfs.c b/module/os/linux/zfs/zfs_sysfs.c index fb7c68987360..e73b34a2f37a 100644 --- a/module/os/linux/zfs/zfs_sysfs.c +++ b/module/os/linux/zfs/zfs_sysfs.c @@ -65,16 +65,15 @@ /* * A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs' */ -struct zfs_mod_kobj; typedef struct zfs_mod_kobj zfs_mod_kobj_t; - struct zfs_mod_kobj { struct kobject zko_kobj; struct kobj_type zko_kobj_type; struct sysfs_ops zko_sysfs_ops; size_t zko_attr_count; struct attribute *zko_attr_list; /* allocated */ - struct attribute **zko_default_attrs; /* allocated */ + struct attribute_group zko_default_group; /* .attrs allocated */ + const struct attribute_group *zko_default_groups[2]; size_t zko_child_count; zfs_mod_kobj_t *zko_children; /* allocated */ }; @@ -126,10 +125,10 @@ zfs_kobj_release(struct kobject *kobj) zkobj->zko_attr_list = NULL; } - if (zkobj->zko_default_attrs != NULL) { - kmem_free(zkobj->zko_default_attrs, + if (zkobj->zko_default_group.attrs != NULL) { + kmem_free(zkobj->zko_default_group.attrs, DEFAULT_ATTR_SIZE(zkobj->zko_attr_count)); - zkobj->zko_default_attrs = NULL; + zkobj->zko_default_group.attrs = NULL; } if (zkobj->zko_child_count != 0) { @@ -153,11 +152,12 @@ zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) { VERIFY3U(attr_num, <, zkobj->zko_attr_count); ASSERT(zkobj->zko_attr_list); - ASSERT(zkobj->zko_default_attrs); + ASSERT(zkobj->zko_default_group.attrs); zkobj->zko_attr_list[attr_num].name = attr_name; zkobj->zko_attr_list[attr_num].mode = 0444; - zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num]; + zkobj->zko_default_group.attrs[attr_num] = + &zkobj->zko_attr_list[attr_num]; sysfs_attr_init(&zkobj->zko_attr_list[attr_num]); } @@ -175,9 +175,9 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt, return (ENOMEM); } /* this will always have at least one slot for NULL termination */ - zkobj->zko_default_attrs = kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt), - KM_SLEEP); - if (zkobj->zko_default_attrs == NULL) { + zkobj->zko_default_group.attrs = + kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt), KM_SLEEP); + if (zkobj->zko_default_group.attrs == NULL) { if (zkobj->zko_attr_list != NULL) { kmem_free(zkobj->zko_attr_list, ATTR_TABLE_SIZE(attr_cnt)); @@ -185,14 +185,19 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt, return (ENOMEM); } zkobj->zko_attr_count = attr_cnt; - zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_attrs; + zkobj->zko_default_groups[0] = &zkobj->zko_default_group; +#ifdef HAVE_SYSFS_DEFAULT_GROUPS + zkobj->zko_kobj_type.default_groups = zkobj->zko_default_groups; +#else + zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_group.attrs; +#endif if (child_cnt > 0) { zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt), KM_SLEEP); if (zkobj->zko_children == NULL) { - if (zkobj->zko_default_attrs != NULL) { - kmem_free(zkobj->zko_default_attrs, + if (zkobj->zko_default_group.attrs != NULL) { + kmem_free(zkobj->zko_default_group.attrs, DEFAULT_ATTR_SIZE(attr_cnt)); } if (zkobj->zko_attr_list != NULL) { @@ -214,9 +219,9 @@ zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt, static int zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name) { - /* zko_default_attrs must be NULL terminated */ - ASSERT(zkobj->zko_default_attrs != NULL); - ASSERT(zkobj->zko_default_attrs[zkobj->zko_attr_count] == NULL); + /* zko_default_group.attrs must be NULL terminated */ + ASSERT(zkobj->zko_default_group.attrs != NULL); + ASSERT(zkobj->zko_default_group.attrs[zkobj->zko_attr_count] == NULL); kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type); return (kobject_add(&zkobj->zko_kobj, parent, name));