Skip to content

Commit

Permalink
compute fletcher 4 with avx instructions
Browse files Browse the repository at this point in the history
Detect if the running CPU supports AVX instruction, and evaluate
Fletcher-4 computation througput and choose the fastest one.

Signed-off-by: Jinshan Xiong <[email protected]>
  • Loading branch information
Jinshan Xiong committed Feb 25, 2016
1 parent 19a47cb commit 151633e
Show file tree
Hide file tree
Showing 6 changed files with 413 additions and 38 deletions.
3 changes: 2 additions & 1 deletion include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ KERNEL_H = \
$(top_srcdir)/include/sys/zfs_ioctl.h \
$(top_srcdir)/include/sys/zfs_onexit.h \
${top_srcdir}/include/sys/zpl.h \
$(top_srcdir)/include/sys/zvol.h
$(top_srcdir)/include/sys/zvol.h \
$(top_srcdir)/include/sys/platform_cpu_compat.h

USER_H =

Expand Down
46 changes: 46 additions & 0 deletions include/sys/platform_cpu_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (C) 2015 Intel Corporation.
*/

#ifndef _ZFS_CPU_COMPAT_H
#define _ZFS_CPU_COMPAT_H

#include <sys/disp.h>

#ifdef __linux__
#include <asm/cpufeature.h>

#ifdef cpu_has_avx2
#include <asm/i387.h>

#define HAVE_KERNEL_CPU_AVX2 1
#endif /* cpu_has_avx2 */


#define kernel_cpu_relax() cpu_relax()
#define kernel_fpu_save() kernel_fpu_begin()
#define kernel_fpu_restore() kernel_fpu_end()
#endif /* __linux__ */

#endif /* _ZFS_CPU_COMPAT_H */
10 changes: 10 additions & 0 deletions include/zfs_fletcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ extern "C" {
* fletcher checksum functions
*/

struct fletcher_4_calls {
void (*init)(zio_cksum_t *);
void (*fini)(zio_cksum_t *);
void (*compute)(const void *, uint64_t, zio_cksum_t *);
void (*compute_byteswap)(const void *, uint64_t, zio_cksum_t *);
boolean_t (*valid)(void);
const char *name;
};

void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
Expand All @@ -45,6 +54,7 @@ void fletcher_4_incremental_native(const void *, uint64_t,
zio_cksum_t *);
void fletcher_4_incremental_byteswap(const void *, uint64_t,
zio_cksum_t *);
void fletcher_4_init(void);

#ifdef __cplusplus
}
Expand Down
139 changes: 102 additions & 37 deletions module/zcommon/zfs_fletcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
#include <sys/byteorder.h>
#include <sys/zio.h>
#include <sys/spa.h>
#include <zfs_fletcher.h>

void
fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
Expand Down Expand Up @@ -165,43 +166,13 @@ fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
}

void
fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
static void fletcher_4_generic_init(zio_cksum_t *zcp)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
uint64_t a, b, c, d;

for (a = b = c = d = 0; ip < ipend; ip++) {
a += ip[0];
b += a;
c += b;
d += c;
}

ZIO_SET_CHECKSUM(zcp, a, b, c, d);
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
}

void
fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
uint64_t a, b, c, d;

for (a = b = c = d = 0; ip < ipend; ip++) {
a += BSWAP_32(ip[0]);
b += a;
c += b;
d += c;
}

ZIO_SET_CHECKSUM(zcp, a, b, c, d);
}

void
fletcher_4_incremental_native(const void *buf, uint64_t size,
zio_cksum_t *zcp)
static void
fletcher_4_generic(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
Expand All @@ -222,9 +193,8 @@ fletcher_4_incremental_native(const void *buf, uint64_t size,
ZIO_SET_CHECKSUM(zcp, a, b, c, d);
}

void
fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
zio_cksum_t *zcp)
static void
fletcher_4_generic_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
Expand All @@ -245,7 +215,102 @@ fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
ZIO_SET_CHECKSUM(zcp, a, b, c, d);
}

static const struct fletcher_4_calls fletcher_4_generic_calls = {
.init = fletcher_4_generic_init,
.compute = fletcher_4_generic,
.compute_byteswap = fletcher_4_generic_byteswap,
.name = "generic"
};

static const struct fletcher_4_calls *chosen = &fletcher_4_generic_calls;

void
fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
chosen->init(zcp);
chosen->compute(buf, size, zcp);
if (chosen->fini != NULL)
chosen->fini(zcp);
}

void
fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
{
chosen->init(zcp);
chosen->compute_byteswap(buf, size, zcp);
if (chosen->fini != NULL)
chosen->fini(zcp);
}

void
fletcher_4_incremental_native(const void *buf, uint64_t size,
zio_cksum_t *zcp)
{
fletcher_4_generic(buf, size, zcp);
}

void
fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
zio_cksum_t *zcp)
{
fletcher_4_generic_byteswap(buf, size, zcp);
}

#if defined(_KERNEL) && defined(HAVE_SPL)
#include <sys/platform_cpu_compat.h>

#ifdef HAVE_KERNEL_CPU_AVX2
#include "zfs_fletcher_intel.c"
#endif

static const struct fletcher_4_calls *fletcher_4_algos[] = {
&fletcher_4_generic_calls,
#if defined(HAVE_KERNEL_CPU_AVX2)
&fletcher_4_avx2_calls,
#endif
};

void
fletcher_4_init(void)
{
unsigned long bestperf = 0;
const void *databuf = current_text_addr();
const unsigned int bits = 4;
int i;

for (i = 0; i < ARRAY_SIZE(fletcher_4_algos); i++) {
const struct fletcher_4_calls *algo = fletcher_4_algos[i];
unsigned long perf = 0;
clock_t j0, j1;
zio_cksum_t zc;

if (algo->valid != NULL && !algo->valid())
continue;

kpreempt_disable();
j0 = lbolt;
while ((j1 = lbolt) == j0)
kernel_cpu_relax();

algo->init(&zc);
while (ddi_time_before(lbolt, j1 + (1 << bits))) {
algo->compute(databuf, PAGE_SIZE, &zc);
perf++;
}
if (algo->fini != NULL)
algo->fini(&zc);
kpreempt_enable();

if (perf > bestperf) {
bestperf = perf;
chosen = algo;
}
cmn_err(CE_NOTE, "fletcher-4: %-8s %5ld MB/s",
algo->name, SEC_TO_TICK(perf) >> (20 - 16 + bits));
}
}

EXPORT_SYMBOL(fletcher_4_init);
EXPORT_SYMBOL(fletcher_2_native);
EXPORT_SYMBOL(fletcher_2_byteswap);
EXPORT_SYMBOL(fletcher_4_native);
Expand Down
Loading

0 comments on commit 151633e

Please sign in to comment.