Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance vdev layer to maintain logical and physical block sizes #1671

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1294,12 +1294,13 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
int namewidth, int depth, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
uint_t c, vsc, children;
pool_scan_stat_t *ps = NULL;
vdev_stat_t *vs;
char rbuf[6], wbuf[6], cbuf[6];
char *vname;
uint64_t notpresent;
uint64_t ashift;
spare_cbdata_t cb;
char *state;

Expand All @@ -1308,7 +1309,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
children = 0;

verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
(uint64_t **)&vs, &vsc) == 0);

state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
if (isspare) {
Expand Down Expand Up @@ -1361,6 +1362,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("unsupported feature(s)"));
break;

case VDEV_AUX_ASHIFT_TOO_BIG:
(void) printf(gettext("unsupported minimum blocksize"));
break;

case VDEV_AUX_SPARED:
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
&cb.cb_guid) == 0);
Expand Down Expand Up @@ -1403,6 +1408,12 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("corrupted data"));
break;
}
} else if (children == 0 && !isspare &&
VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
vs->vs_configured_ashift < vs->vs_physical_ashift) {
(void) printf(
gettext(" block size: %dB configured, %dB native"),
1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift);
}

(void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
Expand Down Expand Up @@ -4322,6 +4333,15 @@ status_callback(zpool_handle_t *zhp, void *data)
"'zpool clear'.\n"));
break;

case ZPOOL_STATUS_NON_NATIVE_ASHIFT:
(void) printf(gettext("status: One or more devices are "
"configured to use a non-native block size.\n"
"\tExpect reduced performance.\n"));
(void) printf(gettext("action: Replace affected devices with "
"devices that support the\n\tconfigured block size, or "
"migrate data to a properly configured\n\tpool.\n"));
break;

default:
/*
* The remaining errors can't actually be generated, yet.
Expand Down
1 change: 1 addition & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ typedef enum {
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
ZPOOL_STATUS_OFFLINE_DEV, /* device online */
ZPOOL_STATUS_REMOVED_DEV, /* removed device */
ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */

/*
* Finally, the following indicates a healthy pool.
Expand Down
12 changes: 9 additions & 3 deletions include/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,15 +416,21 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
* the logical block size interface and then the older hard sector size.
*/
#ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE
# define vdev_bdev_block_size(bdev) bdev_physical_block_size(bdev)
# define vdev_bdev_physical_block_size(bdev) bdev_physical_block_size(bdev)
#else
# ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
# define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev)
# define vdev_bdev_physical_block_size(bdev) bdev_logical_block_size(bdev)
# else
# define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev)
# define vdev_bdev_physical_block_size(bdev) bdev_hardsect_size(bdev)
# endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */
#endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */

#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
# define vdev_bdev_logical_block_size(bdev) bdev_logical_block_size(bdev)
#else
# define vdev_bdev_logical_block_size(bdev) bdev_hardsect_size(bdev)
#endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */

/*
* 2.6.37 API change
* The WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags have been
Expand Down
14 changes: 13 additions & 1 deletion include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,8 @@ typedef enum vdev_aux {
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
VDEV_AUX_EXTERNAL, /* external diagnosis */
VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
VDEV_AUX_ASHIFT_TOO_BIG /* vdev's min block size is too large */
} vdev_aux_t;

/*
Expand Down Expand Up @@ -707,8 +708,19 @@ typedef struct vdev_stat {
uint64_t vs_self_healed; /* self-healed bytes */
uint64_t vs_scan_removing; /* removing? */
uint64_t vs_scan_processed; /* scan processed bytes */
uint64_t vs_configured_ashift; /* TLV vdev_ashift */
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
} vdev_stat_t;

#ifndef offsetof
#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
#endif

#define VDEV_STAT_VALID(field, uint64_t_field_count) \
((uint64_t_field_count * sizeof(uint64_t)) >= \
(offsetof(vdev_stat_t, field) + sizeof(((vdev_stat_t *)NULL)->field)))

/*
* DDT statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
Expand Down
1 change: 1 addition & 0 deletions include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_minblocksize(metaslab_class_t *mc);

extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
vdev_t *vd);
Expand Down
1 change: 1 addition & 0 deletions include/sys/metaslab_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct metaslab_class {
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
uint64_t mc_dspace; /* total deflated space */
uint64_t mc_minblocksize;
kmutex_t mc_fastwrite_lock;
};

Expand Down
11 changes: 11 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ struct dsl_pool;

#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)

/*
* Maximum supported logical ashift.
*
* The current 8k allocation block size limit is due to the 8k
* aligned/sized operations performed by vdev_probe() on
* vdev_label->vl_pad2. Using another "safe region" for these tests
* would allow the limit to be raised to 16k, at the expense of
* only having 8 available uberblocks in the label area.
*/
#define SPA_MAXASHIFT 13

/*
* Size of block to hold the configuration data (a packed nvlist)
*/
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ extern void vdev_rele(vdev_t *);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_ashift_optimize(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd);
Expand Down
20 changes: 19 additions & 1 deletion include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ typedef struct vdev_cache_entry vdev_cache_entry_t;
* Virtual device operations
*/
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
uint64_t *ashift);
uint64_t *logical_ashift, uint64_t *physical_ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
Expand Down Expand Up @@ -131,6 +131,24 @@ struct vdev {
uint64_t vdev_min_asize; /* min acceptable asize */
uint64_t vdev_max_asize; /* max acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
/*
* Logical block alignment shift
*
* The smallest sized/aligned I/O supported by the device.
*/
uint64_t vdev_logical_ashift;
/*
* Physical block alignment shift
*
* The device supports logical I/Os with vdev_logical_ashift
* size/alignment, but optimum performance will be achieved by
* aligning/sizing requests to vdev_physical_ashift. Smaller
* requests may be inflated or incur device level read-modify-write
* operations.
*
* May be 0 to indicate no preference (i.e. use vdev_logical_ashift).
*/
uint64_t vdev_physical_ashift;
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
vdev_ops_t *vdev_ops; /* vdev operations */
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zio_compress.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ extern int lz4_decompress(void *src, void *dst, size_t s_len, size_t d_len,
* Compress and decompress data if necessary.
*/
extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
size_t s_len);
size_t s_len, size_t minblocksize);
extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
size_t s_len, size_t d_len);

Expand Down
Loading