Skip to content

Commit

Permalink
Tiered early abort, zstd edition.
Browse files Browse the repository at this point in the history
It turns out that "do LZ4 and zstd-1 both fail" is a great heuristic
for "don't even bother trying higher zstd tiers".

By way of illustration:
$ cat /incompress | mbuffer | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_normal
summary: 39.8 GiByte in  3min 40.2sec - average of  185 MiB/s
$ echo 3 | sudo tee /sys/module/zzstd/parameters/zstd_lz4_pass
3
$ cat /incompress | mbuffer -m 4G | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_patched
summary: 39.8 GiByte in 48.6sec - average of  839 MiB/s
$ sudo zfs list -p -o name,used,lused,ratio evenfaster/lowcomp_1M_zstd12_normal evenfaster/lowcomp_1M_zstd12_patched
NAME                                         USED        LUSED  RATIO
evenfaster/lowcomp_1M_zstd12_normal   39549931520  42721221632   1.08
evenfaster/lowcomp_1M_zstd12_patched  39626399744  42721217536   1.07
$ python3 -c "print(39626399744 - 39549931520)"
76468224
$

I'll take 76 MB out of 42 GB for > 4x speedup.

Signed-off-by: Rich Ercolani <[email protected]>
  • Loading branch information
rincebrain committed Apr 24, 2022
1 parent 7eba389 commit 8e5bc91
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 15 deletions.
4 changes: 4 additions & 0 deletions include/sys/zstd/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ typedef struct zfs_zstd_meta {
* kstat helper macros
*/
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
#define ZSTDSTAT_ZERO(stat) \
(atomic_store_64(&zstd_stats.stat.value.ui64, 0))
#define ZSTDSTAT_ADD(stat, val) \
atomic_add_64(&zstd_stats.stat.value.ui64, (val))
#define ZSTDSTAT_SUB(stat, val) \
Expand All @@ -90,6 +92,8 @@ void zstd_fini(void);

size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level);
size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level);
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
size_t d_len, uint8_t *level);
Expand Down
17 changes: 9 additions & 8 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -9337,26 +9337,27 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
}

if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
cabd = abd_alloc_for_io(asize, ismd);
tmp = abd_borrow_buf(cabd, asize);
cabd = abd_alloc_for_io(size, ismd);
tmp = abd_borrow_buf(cabd, size);

psize = zio_compress_data(compress, to_write, tmp, size,
hdr->b_complevel);

if (psize >= size) {
abd_return_buf(cabd, tmp, asize);
if (psize >= asize) {
psize = HDR_GET_PSIZE(hdr);
abd_return_buf_copy(cabd, tmp, size);
HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
to_write = cabd;
abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
if (size != asize)
abd_zero_off(to_write, size, asize - size);
abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
if (psize != asize)
abd_zero_off(to_write, psize, asize - psize);
goto encrypt;
}
ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
if (psize < asize)
memset((char *)tmp + psize, 0, asize - psize);
psize = HDR_GET_PSIZE(hdr);
abd_return_buf_copy(cabd, tmp, asize);
abd_return_buf_copy(cabd, tmp, size);
to_write = cabd;
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zio_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
{"zle", 64, zle_compress, zle_decompress, NULL},
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress,
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap,
zfs_zstd_decompress, zfs_zstd_decompress_level},
};

Expand Down
128 changes: 122 additions & 6 deletions module/zstd/zfs_zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,13 @@
#include "lib/zstd.h"
#include "lib/common/zstd_errors.h"

static int zstd_earlyabort_pass = 1;
static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
static unsigned int zstd_abort_size = (128 * 1024);

static kstat_t *zstd_ksp = NULL;


typedef struct zstd_stats {
kstat_named_t zstd_stat_alloc_fail;
kstat_named_t zstd_stat_alloc_fallback;
Expand All @@ -62,6 +67,21 @@ typedef struct zstd_stats {
kstat_named_t zstd_stat_dec_header_inval;
kstat_named_t zstd_stat_com_fail;
kstat_named_t zstd_stat_dec_fail;
/*
* LZ4 first-pass early abort verdict
*/
kstat_named_t zstd_stat_lz4pass_allowed;
kstat_named_t zstd_stat_lz4pass_rejected;
/*
* zstd-1 second-pass early abort verdict
*/
kstat_named_t zstd_stat_zstdpass_allowed;
kstat_named_t zstd_stat_zstdpass_rejected;
/*
* We excluded this from early abort for some reason
*/
kstat_named_t zstd_stat_passignored;
kstat_named_t zstd_stat_passignored_size;
kstat_named_t zstd_stat_buffers;
kstat_named_t zstd_stat_size;
} zstd_stats_t;
Expand All @@ -76,10 +96,44 @@ static zstd_stats_t zstd_stats = {
{ "decompress_header_invalid", KSTAT_DATA_UINT64 },
{ "compress_failed", KSTAT_DATA_UINT64 },
{ "decompress_failed", KSTAT_DATA_UINT64 },
{ "lz4pass_allowed", KSTAT_DATA_UINT64 },
{ "lz4pass_rejected", KSTAT_DATA_UINT64 },
{ "zstdpass_allowed", KSTAT_DATA_UINT64 },
{ "zstdpass_rejected", KSTAT_DATA_UINT64 },
{ "passignored", KSTAT_DATA_UINT64 },
{ "passignored_size", KSTAT_DATA_UINT64 },
{ "buffers", KSTAT_DATA_UINT64 },
{ "size", KSTAT_DATA_UINT64 },
};

#ifdef _KERNEL
static int
kstat_zstd_update(kstat_t *ksp, int rw)
{
ASSERT(ksp != NULL);

if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_com_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
ZSTDSTAT_ZERO(zstd_stat_com_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_fail);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
ZSTDSTAT_ZERO(zstd_stat_passignored);
ZSTDSTAT_ZERO(zstd_stat_passignored_size);
}

return (0);
}
#endif

/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
enum zstd_kmem_type {
ZSTD_KMEM_UNKNOWN = 0,
Expand Down Expand Up @@ -377,6 +431,64 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
}


size_t
zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
int level)
{
int16_t zstd_level;
if (zstd_enum_to_level(level, &zstd_level)) {
ZSTDSTAT_BUMP(zstd_stat_com_inval);
return (s_len);
}
/*
* A zstd early abort heuristic.
*
* - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
* 128k), don't try any of this, just go.
* (because experimentally that was a reasonable cutoff for a perf win
* with tiny ratio change)
* - First, we try LZ4 compression, and if it doesn't early abort, we
* jump directly to whatever compression level we intended to try.
* - Second, we try zstd-1 - if that errors out (usually, but not
* exclusively, if it would overflow), we give up early.
*
* If it works, instead we go on and compress anyway.
*
* Why two passes? LZ4 alone gets you a lot of the way, but on highly
* compressible data, it was losing up to 8.5% of the compressed
* savings versus no early abort, and all the zstd-fast levels are
* worse indications on their own than LZ4, and don't improve the LZ4
* pass noticably if stacked like this.
*/
size_t actual_abort_size = zstd_abort_size;
if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
s_len >= actual_abort_size) {
int pass_len = 1;
pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
if (pass_len < lz4_len) {
ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
goto keep_trying;
}
ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);

pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
ZIO_ZSTD_LEVEL_1);
if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
return (s_len);
}
ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
} else {
ZSTDSTAT_BUMP(zstd_stat_passignored);
if (s_len < actual_abort_size) {
ZSTDSTAT_BUMP(zstd_stat_passignored_size);
}
}
keep_trying:
return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));

}

/* Compress block using zstd */
size_t
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
Expand Down Expand Up @@ -409,7 +521,6 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
return (s_len);
}

/* Set the compression level */
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);

Expand Down Expand Up @@ -437,8 +548,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
* too small, that is not a failure. Everything else is a
* failure, so increment the compression failure counter.
*/
if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
int err = ZSTD_getErrorCode(c_len);
if (err != ZSTD_error_dstSize_tooSmall) {
ZSTDSTAT_BUMP(zstd_stat_com_fail);
zfs_dbgmsg("Error: %s", ZSTD_getErrorString(err));
}
return (s_len);
}
Expand Down Expand Up @@ -753,6 +866,9 @@ zstd_init(void)
if (zstd_ksp != NULL) {
zstd_ksp->ks_data = &zstd_stats;
kstat_install(zstd_ksp);
#ifdef _KERNEL
zstd_ksp->ks_update = kstat_zstd_update;
#endif
}

return (0);
Expand Down Expand Up @@ -781,8 +897,8 @@ module_init(zstd_init);
module_exit(zstd_fini);
#endif

EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);
EXPORT_SYMBOL(zfs_zstd_decompress);
EXPORT_SYMBOL(zfs_zstd_cache_reap_now);
ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW,
"Should we try early abort when using zstd?");
ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
"Only >= this many bytes should we try early abort");
#endif

0 comments on commit 8e5bc91

Please sign in to comment.