diff --git a/include/sys/blake3.h b/include/sys/blake3.h index e6650372ccda..b3391c5f2349 100644 --- a/include/sys/blake3.h +++ b/include/sys/blake3.h @@ -92,6 +92,11 @@ void Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out); void Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out, size_t out_len); +/* these are pre-allocated contexts */ +extern void **blake3_per_cpu_ctx; +extern void blake3_per_cpu_ctx_init(void); +extern void blake3_per_cpu_ctx_fini(void); + /* return number of supported implementations */ extern int blake3_get_impl_count(void); diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c index c3268ec13dad..c3809a2827be 100644 --- a/module/icp/algs/blake3/blake3_impl.c +++ b/module/icp/algs/blake3/blake3_impl.c @@ -201,6 +201,34 @@ blake3_impl_get_ops(void) return (blake3_selected_impl); } +#if defined(_KERNEL) +void **blake3_per_cpu_ctx; + +void +blake3_per_cpu_ctx_init(void) +{ + /* + * Create "The Godfather" ptr to hold all blake3 ctx + */ + blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); + for (int i = 0; i < max_ncpus; i++) { + blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), + KM_SLEEP); + } +} + +void +blake3_per_cpu_ctx_fini(void) +{ + for (int i = 0; i < max_ncpus; i++) { + memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); + kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); + } + memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); + kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); +} +#endif + #if defined(_KERNEL) && defined(__linux__) static int icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp) diff --git a/module/zfs/blake3_zfs.c b/module/zfs/blake3_zfs.c index 51c455fe7237..7560f30fd4e4 100644 --- a/module/zfs/blake3_zfs.c +++ b/module/zfs/blake3_zfs.c @@ -47,18 +47,22 @@ void abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { - BLAKE3_CTX *ctx; - - ctx = kmem_alloc(sizeof (*ctx), KM_NOSLEEP); - ASSERT(ctx != 0); ASSERT(ctx_template != 0); +#if defined(_KERNEL) + BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE]; +#else + BLAKE3_CTX *ctx = kmem_alloc(sizeof (*ctx), KM_SLEEP); +#endif + memcpy(ctx, ctx_template, sizeof (*ctx)); (void) abd_iterate_func(abd, 0, size, blake3_incremental, ctx); Blake3_Final(ctx, (uint8_t *)zcp); +#if !defined(_KERNEL) memset(ctx, 0, sizeof (*ctx)); kmem_free(ctx, sizeof (*ctx)); +#endif } /* diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c index 3ebe08541b0b..639784287d72 100644 --- a/module/zfs/zfs_chksum.c +++ b/module/zfs/zfs_chksum.c @@ -277,6 +277,9 @@ chksum_benchmark(void) void chksum_init(void) { +#ifdef _KERNEL + blake3_per_cpu_ctx_init(); +#endif /* Benchmark supported implementations */ chksum_benchmark(); @@ -313,4 +316,8 @@ chksum_fini(void) chksum_stat_cnt = 0; chksum_stat_data = 0; } + +#ifdef _KERNEL + blake3_per_cpu_ctx_fini(); +#endif }