diff --git a/config/kernel-blk-queue-unplug.m4 b/config/kernel-blk-queue-unplug.m4 new file mode 100644 index 000000000000..45cc2322a5e4 --- /dev/null +++ b/config/kernel-blk-queue-unplug.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 2.6.32-2.6.35 API - The BIO_RW_UNPLUG enum can be used as a hint +dnl # to unplug the queue. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG], [ + AC_MSG_CHECKING([whether the BIO_RW_UNPLUG enum is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + extern enum bio_rw_flags rw; + + rw = BIO_RW_UNPLUG; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG, 1, + [BIO_RW_UNPLUG is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index ef8f7cd789b6..a60293192db2 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -33,6 +33,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS + ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BIO_RW_UNPLUG ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_GET_GENDISK ZFS_AC_KERNEL_DISCARD_GRANULARITY diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h index cf8028d2aca5..ff7cd217bb5a 100644 --- a/include/sys/vdev_disk.h +++ b/include/sys/vdev_disk.h @@ -37,9 +37,11 @@ typedef struct vdev_disk { struct block_device *vd_bdev; } vdev_disk_t; +#ifndef __linux__ extern int vdev_disk_physio(struct block_device *, caddr_t, size_t, uint64_t, int, int); extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); +#endif #endif /* _KERNEL */ #endif /* _SYS_VDEV_DISK_H */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9c9d5a542009..4fde89a94f71 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3863,7 +3863,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, return (0); } -#ifdef _KERNEL +#if defined(_KERNEL) && !defined(__linux__) /* * Get the root pool information from the root disk, then import the root pool * during the system boot up time. @@ -4066,7 +4066,7 @@ spa_import_rootpool(char *devpath, char *devid) return (error); } -#endif +#endif /* defined(_KERNEL) && !defined(__linux__) */ /* * Import a non-root pool into the system. @@ -6781,7 +6781,6 @@ EXPORT_SYMBOL(spa_open); EXPORT_SYMBOL(spa_open_rewind); EXPORT_SYMBOL(spa_get_stats); EXPORT_SYMBOL(spa_create); -EXPORT_SYMBOL(spa_import_rootpool); EXPORT_SYMBOL(spa_import); EXPORT_SYMBOL(spa_tryimport); EXPORT_SYMBOL(spa_destroy); diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 2f06e721bed9..5ec19870c9d2 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -41,10 +41,8 @@ static void *zfs_vdev_holder = VDEV_HOLDER; * Virtual device vector for disks. */ typedef struct dio_request { - struct completion dr_comp; /* Completion for sync IO */ zio_t *dr_zio; /* Parent ZIO */ atomic_t dr_ref; /* References */ - int dr_wait; /* Wait for IO */ int dr_error; /* Bio error */ int dr_bio_count; /* Count of bio's */ struct bio *dr_bio[0]; /* Attached bio's */ @@ -363,7 +361,6 @@ vdev_disk_dio_alloc(int bio_count) dr = kmem_zalloc(sizeof (dio_request_t) + sizeof (struct bio *) * bio_count, KM_SLEEP); if (dr) { - init_completion(&dr->dr_comp); atomic_set(&dr->dr_ref, 0); dr->dr_bio_count = bio_count; dr->dr_error = 0; @@ -426,7 +423,6 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) { dio_request_t *dr = bio->bi_private; int rc; - int wait; if (dr->dr_error == 0) { #ifdef HAVE_1ARG_BIO_END_IO_T @@ -439,13 +435,8 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) #endif } - wait = dr->dr_wait; /* Drop reference aquired by __vdev_disk_physio */ rc = vdev_disk_dio_put(dr); - - /* Wake up synchronous waiter this is the last outstanding bio */ - if (wait && rc == 1) - complete(&dr->dr_comp); } static inline unsigned long @@ -527,7 +518,7 @@ vdev_submit_bio(struct bio *bio) static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, - size_t kbuf_size, uint64_t kbuf_offset, int rw, int flags, int wait) + size_t kbuf_size, uint64_t kbuf_offset, int rw, int flags) { dio_request_t *dr; caddr_t bio_ptr; @@ -546,7 +537,6 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, bio_set_flags_failfast(bdev, &flags); dr->dr_zio = zio; - dr->dr_wait = wait; /* * When the IO size exceeds the maximum bio size for the request @@ -610,33 +600,20 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, if (dr->dr_bio[i]) vdev_submit_bio(dr->dr_bio[i]); - /* - * On synchronous blocking requests we wait for all bio the completion - * callbacks to run. We will be woken when the last callback runs - * for this dio. We are responsible for putting the last dio_request - * reference will in turn put back the last bio references. The - * only synchronous consumer is vdev_disk_read_rootlabel() all other - * IO originating from vdev_disk_io_start() is asynchronous. - */ - if (wait) { - wait_for_completion(&dr->dr_comp); - error = dr->dr_error; - ASSERT3S(atomic_read(&dr->dr_ref), ==, 1); - } - (void) vdev_disk_dio_put(dr); return (error); } +#ifndef __linux__ int vdev_disk_physio(struct block_device *bdev, caddr_t kbuf, size_t size, uint64_t offset, int rw, int flags) { bio_set_flags_failfast(bdev, &flags); - return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, rw, flags, - 1)); + return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, rw, flags)); } +#endif BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc) { @@ -688,7 +665,6 @@ vdev_disk_io_start(zio_t *zio) { vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - zio_priority_t pri = zio->io_priority; int rw, flags, error; switch (zio->io_type) { @@ -729,18 +705,24 @@ vdev_disk_io_start(zio_t *zio) return; case ZIO_TYPE_WRITE: rw = WRITE; - if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot)) - flags = WRITE_SYNC; - else - flags = 0; +#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG) + flags = (1 << BIO_RW_UNPLUG); +#elif defined(REQ_UNPLUG) + flags = REQ_UNPLUG; +#else + flags = 0; +#endif break; case ZIO_TYPE_READ: rw = READ; - if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot)) - flags = READ_SYNC; - else - flags = 0; +#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG) + flags = (1 << BIO_RW_UNPLUG); +#elif defined(REQ_UNPLUG) + flags = REQ_UNPLUG; +#else + flags = 0; +#endif break; default: @@ -750,7 +732,7 @@ vdev_disk_io_start(zio_t *zio) } error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data, - zio->io_size, zio->io_offset, rw, flags, 0); + zio->io_size, zio->io_offset, rw, flags); if (error) { zio->io_error = error; zio_interrupt(zio); @@ -820,6 +802,7 @@ vdev_ops_t vdev_disk_ops = { B_TRUE /* leaf vdev */ }; +#ifndef __linux__ /* * Given the root disk device devid or pathname, read the label from * the device, and construct a configuration nvlist. @@ -883,6 +866,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) return (0); } +#endif /* __linux__ */ module_param(zfs_vdev_scheduler, charp, 0644); MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");