Skip to content

Commit

Permalink
Adapt jxiong:vectorized_fletcher (openzfs#4330) to
Browse files Browse the repository at this point in the history
  • Loading branch information
ironMann committed Mar 16, 2016
1 parent b378ac3 commit 31b22fb
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 110 deletions.
4 changes: 2 additions & 2 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
$(top_srcdir)/include/sys/spa_checksum.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
Expand Down Expand Up @@ -104,8 +105,7 @@ KERNEL_H = \
$(top_srcdir)/include/sys/zfs_ioctl.h \
$(top_srcdir)/include/sys/zfs_onexit.h \
${top_srcdir}/include/sys/zpl.h \
$(top_srcdir)/include/sys/zvol.h \
$(top_srcdir)/include/sys/platform_cpu_compat.h
$(top_srcdir)/include/sys/zvol.h

USER_H =

Expand Down
46 changes: 0 additions & 46 deletions include/sys/platform_cpu_compat.h

This file was deleted.

33 changes: 1 addition & 32 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#include <sys/spa_checksum.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -142,12 +143,6 @@ typedef struct dva {
uint64_t dva_word[2];
} dva_t;

/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
uint64_t zc_word[4];
} zio_cksum_t;

/*
* Each block is described by its DVAs, time of birth, checksum, etc.
Expand Down Expand Up @@ -440,35 +435,9 @@ _NOTE(CONSTCOND) } while (0)
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))

#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))

#define ZIO_CHECKSUM_IS_ZERO(zc) \
(0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
(zc)->zc_word[2] | (zc)->zc_word[3]))

#define ZIO_CHECKSUM_BSWAP(zcp) \
{ \
(zcp)->zc_word[0] = BSWAP_64((zcp)->zc_word[0]); \
(zcp)->zc_word[1] = BSWAP_64((zcp)->zc_word[1]); \
(zcp)->zc_word[2] = BSWAP_64((zcp)->zc_word[2]); \
(zcp)->zc_word[3] = BSWAP_64((zcp)->zc_word[3]); \
}


#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)

#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
{ \
(zcp)->zc_word[0] = w0; \
(zcp)->zc_word[1] = w1; \
(zcp)->zc_word[2] = w2; \
(zcp)->zc_word[3] = w3; \
}

#define BP_IDENTITY(bp) (ASSERT(!BP_IS_EMBEDDED(bp)), &(bp)->blk_dva[0])
#define BP_IS_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? B_FALSE : DVA_GET_GANG(BP_IDENTITY(bp)))
Expand Down
72 changes: 72 additions & 0 deletions include/sys/spa_checksum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/

#ifndef _SPA_CHECKSUM_H
#define _SPA_CHECKSUM_H

#include <sys/types.h>

#ifdef __cplusplus
extern "C" {
#endif

/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
uint64_t zc_word[4];
} zio_cksum_t;

#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
{ \
(zcp)->zc_word[0] = w0; \
(zcp)->zc_word[1] = w1; \
(zcp)->zc_word[2] = w2; \
(zcp)->zc_word[3] = w3; \
}

#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))

#define ZIO_CHECKSUM_IS_ZERO(zc) \
(0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
(zc)->zc_word[2] | (zc)->zc_word[3]))

#define ZIO_CHECKSUM_BSWAP(zcp) \
{ \
(zcp)->zc_word[0] = BSWAP_64((zcp)->zc_word[0]); \
(zcp)->zc_word[1] = BSWAP_64((zcp)->zc_word[1]); \
(zcp)->zc_word[2] = BSWAP_64((zcp)->zc_word[2]); \
(zcp)->zc_word[3] = BSWAP_64((zcp)->zc_word[3]); \
}

#ifdef __cplusplus
}
#endif

#endif
25 changes: 15 additions & 10 deletions include/zfs_fletcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#define _ZFS_FLETCHER_H

#include <sys/types.h>
#include <sys/spa.h>
#include <sys/spa_checksum.h>

#ifdef __cplusplus
extern "C" {
Expand All @@ -37,15 +37,6 @@ extern "C" {
* fletcher checksum functions
*/

struct fletcher_4_calls {
void (*init)(zio_cksum_t *);
void (*fini)(zio_cksum_t *);
void (*compute)(const void *, uint64_t, zio_cksum_t *);
void (*compute_byteswap)(const void *, uint64_t, zio_cksum_t *);
boolean_t (*valid)(void);
const char *name;
};

void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
Expand All @@ -56,6 +47,20 @@ void fletcher_4_incremental_byteswap(const void *, uint64_t,
zio_cksum_t *);
void fletcher_4_init(void);

/*
* fletcher checksum functions
*/
typedef struct zio_cksum zio_cksum_t;

struct fletcher_4_calls {
void (*init)(zio_cksum_t *);
void (*fini)(zio_cksum_t *);
void (*compute)(const void *, uint64_t, zio_cksum_t *);
void (*compute_byteswap)(const void *, uint64_t, zio_cksum_t *);
boolean_t (*valid)(void);
const char *name;
};

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions module/zcommon/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ $(MODULE)-objs += zfs_comutil.o
$(MODULE)-objs += zfs_fletcher.o
$(MODULE)-objs += zfs_uio.o
$(MODULE)-objs += zpool_prop.o

$(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
26 changes: 15 additions & 11 deletions module/zcommon/zfs_fletcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
#include <sys/zio.h>
#include <sys/spa.h>
#include <zfs_fletcher.h>

Expand Down Expand Up @@ -256,29 +255,33 @@ fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
fletcher_4_generic_byteswap(buf, size, zcp);
}


#if defined(_KERNEL) && defined(HAVE_SPL)
#include <sys/platform_cpu_compat.h>

#ifdef HAVE_KERNEL_CPU_AVX2
#include "zfs_fletcher_intel.c"
#endif
extern struct fletcher_4_calls fletcher_4_avx2_calls;

static const struct fletcher_4_calls *fletcher_4_algos[] = {
&fletcher_4_generic_calls,
#if defined(HAVE_KERNEL_CPU_AVX2)
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
&fletcher_4_avx2_calls,
#endif
};

#define BENCH_SIZE 4096
#define ZFS_ARRAY_SIZE(a) (sizeof (a) / sizeof (*a))
#define kernel_cpu_relax() do {} while (0)

/* cant use allocation methods from zfs module! */
static char databuf[BENCH_SIZE];

void
fletcher_4_init(void)
{
unsigned long bestperf = 0;
const void *databuf = current_text_addr();
const unsigned int bits = 4;
int i;

for (i = 0; i < ARRAY_SIZE(fletcher_4_algos); i++) {
for (i = 0; i < ZFS_ARRAY_SIZE(fletcher_4_algos); i++) {
const struct fletcher_4_calls *algo = fletcher_4_algos[i];
unsigned long perf = 0;
clock_t j0, j1;
Expand All @@ -288,12 +291,13 @@ fletcher_4_init(void)
continue;

kpreempt_disable();
j0 = lbolt;
while ((j1 = lbolt) == j0)
j0 = ddi_get_lbolt();
while ((j1 = ddi_get_lbolt()) == j0) {
kernel_cpu_relax();
}

algo->init(&zc);
while (ddi_time_before(lbolt, j1 + (1 << bits))) {
while (ddi_time_before(ddi_get_lbolt(), j1 + (1 << bits))) {
algo->compute(databuf, PAGE_SIZE, &zc);
perf++;
}
Expand Down
23 changes: 14 additions & 9 deletions module/zcommon/zfs_fletcher_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@
* SOFTWARE.
*/

#if defined(HAVE_AVX) && defined(HAVE_AVX2)

#include <linux/simd_x86.h>
#include <sys/spa_checksum.h>
#include <zfs_fletcher.h>

#ifdef UNITTEST

#if defined(_KERNEL)
Expand All @@ -54,28 +60,25 @@
#include <stdbool.h>
#include <string.h>

#define kernel_fpu_save() do {} while (0)
#define kernel_fpu_restore() do {} while (0)
#define cpu_has_avx2 __builtin_cpu_supports("avx2")

typedef unsigned long long rlim64_t;

#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
#include <sys/zio.h>
#include <sys/spa.h>
#include <zfs_fletcher.h>

#include "zfs_fletcher.c"
#endif /* #ifdef UNITTEST */



static void
fletcher_4_avx2_init(zio_cksum_t *zcp)
{
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);

kernel_fpu_save();
kfpu_begin();

/* clear avx2 registers */
asm volatile("vpxor %ymm0, %ymm0, %ymm0");
Expand All @@ -98,7 +101,7 @@ fletcher_4_avx2_fini(zio_cksum_t *zcp)
asm volatile("vmovdqa %%ymm2, %0":"=m" (c));
asm volatile("vmovdqa %%ymm3, %0":"=m" (d));

kernel_fpu_restore();
kfpu_end();

A = a[0] + a[1] + a[2] + a[3];
B = 0 - a[1] - 2*a[2] - 3*a[3]
Expand Down Expand Up @@ -155,10 +158,10 @@ fletcher_4_avx2_byteswap(const void *buf, uint64_t size, zio_cksum_t *unused)

static boolean_t fletcher_4_avx2_valid(void)
{
return (cpu_has_avx2);
return (zfs_avx_available() && zfs_avx2_available());
}

static const struct fletcher_4_calls fletcher_4_avx2_calls = {
const struct fletcher_4_calls fletcher_4_avx2_calls = {
.init = fletcher_4_avx2_init,
.fini = fletcher_4_avx2_fini,
.compute = fletcher_4_avx2,
Expand Down Expand Up @@ -249,3 +252,5 @@ main(int argc, char **argv)
return (0);
}
#endif /* #ifdef UNITTEST */

#endif /* defined(HAVE_AVX) && defined(HAVE_AVX2) */

0 comments on commit 31b22fb

Please sign in to comment.