Skip to content

Commit

Permalink
Merge pull request #2288 from jamescowens/PR2286
Browse files Browse the repository at this point in the history
build: add upstream compiler warnings
  • Loading branch information
jamescowens authored Aug 18, 2021
2 parents 9b378fe + 2656083 commit d57ca01
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 70 deletions.
4 changes: 4 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://opensource.org/licenses/mit-license.php.

# Pattern rule to print variables, e.g. make print-top_srcdir
print-%: FORCE
@echo '$*'='$($*)'

ACLOCAL_AMFLAGS = -I build-aux/m4
SUBDIRS = src
.PHONY: deploy FORCE
Expand Down
119 changes: 74 additions & 45 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ AC_ARG_ENABLE([glibc-back-compat],
[use_glibc_compat=no])

AC_ARG_ENABLE([asm],
[AS_HELP_STRING([--enable-asm],
[Enable assembly routines (default is yes)])],
[AS_HELP_STRING([--disable-asm],
[disable assembly routines (enabled by default)])],
[use_asm=$enableval],
[use_asm=yes])

Expand All @@ -196,34 +196,6 @@ case $BUILD_TARGET in
;;
esac

if test x$enable_assembly != xno -a x$have_x86_64 = xtrue
then
AC_MSG_CHECKING(whether we can compile AVX code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vmovdqa %ymm0, %ymm1");])],
AC_DEFINE(ENABLE_AVX, 1, [Define to 1 if AVX assembly is available.])
AC_MSG_RESULT(yes)
AC_MSG_CHECKING(whether we can compile XOP code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vprotd \$7, %xmm0, %xmm1");])],
AC_DEFINE(ENABLE_XOP, 1, [Define to 1 if XOP assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the XOP instruction set.])
)
AC_MSG_CHECKING(whether we can compile AVX2 code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])],
AC_DEFINE(enable_avx2=yes; ENABLE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX2 instruction set.])
)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX instruction set.])
)
fi

# Enable debug
AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug],
Expand Down Expand Up @@ -257,17 +229,42 @@ if test "x$enable_werror" = "xyes"; then
if test "x$CXXFLAG_WERROR" = "x"; then
AC_MSG_ERROR("enable-werror set but -Werror is not usable")
fi
AX_CHECK_COMPILE_FLAG([-Werror=gnu],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=gnu"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=vla],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=vla"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=thread-safety-analysis],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=thread-safety-analysis"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=shadow-field],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=shadow-field"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=switch],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=switch"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=thread-safety],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=thread-safety"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=range-loop-analysis],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=range-loop-analysis"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=unused-variable],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=unused-variable"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=date-time],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=date-time"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=return-type],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=return-type"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=conditional-uninitialized],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=conditional-uninitialized"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=unreachable-code-loop-increment],[ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=unreachable-code-loop-increment"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Werror=mismatched-tags], [ERROR_CXXFLAGS="$ERROR_CXXFLAGS -Werror=mismatched-tags"], [], [$CXXFLAG_WERROR])
fi

if test "x$CXXFLAGS_overridden" = "xno"; then
AX_CHECK_COMPILE_FLAG([-Wall],[CXXFLAGS="$CXXFLAGS -Wall"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wextra],[CXXFLAGS="$CXXFLAGS -Wextra"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wformat],[CXXFLAGS="$CXXFLAGS -Wformat"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wgnu],[CXXFLAGS="$CXXFLAGS -Wgnu"],,[[$CXXFLAG_WERROR]])
dnl some compilers will ignore -Wformat-security without -Wformat, so just combine the two here.
AX_CHECK_COMPILE_FLAG([-Wformat -Wformat-security],[CXXFLAGS="$CXXFLAGS -Wformat -Wformat-security"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wvla],[CXXFLAGS="$CXXFLAGS -Wvla"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wformat-security],[CXXFLAGS="$CXXFLAGS -Wformat-security"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wthread-safety-analysis],[CXXFLAGS="$CXXFLAGS -Wthread-safety-analysis"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wshadow-field],[CXXFLAGS="$CXXFLAGS -Wshadow-field"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wswitch],[CXXFLAGS="$CXXFLAGS -Wswitch"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wthread-safety],[CXXFLAGS="$CXXFLAGS -Wthread-safety"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wrange-loop-analysis],[CXXFLAGS="$CXXFLAGS -Wrange-loop-analysis"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wunused-variable],[CXXFLAGS="$CXXFLAGS -Wunused-variable"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wunused-member-function],[CXXFLAGS="$CXXFLAGS -Wunused-member-function"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wdate-time],[CXXFLAGS="$CXXFLAGS -Wdate-time"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wconditional-uninitialized],[CXXFLAGS="$CXXFLAGS -Wconditional-uninitialized"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wsign-compare],[CXXFLAGS="$CXXFLAGS -Wsign-compare"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wduplicated-branches],[CXXFLAGS="$CXXFLAGS -Wduplicated-branches"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wduplicated-cond],[CXXFLAGS="$CXXFLAGS -Wduplicated-cond"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wlogical-op],[CXXFLAGS="$CXXFLAGS -Wlogical-op"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Woverloaded-virtual],[CXXFLAGS="$CXXFLAGS -Woverloaded-virtual"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wunreachable-code-loop-increment],[CXXFLAGS="$CXXFLAGS -Wunreachable-code-loop-increment"],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-Wimplicit-fallthrough], [CXXFLAGS="$CXXFLAGS -Wimplicit-fallthrough"], [], [$CXXFLAG_WERROR])

## Some compilers (gcc) ignore unknown -Wno-* options, but warn about all
## unknown options if any other warning is produced. Test the -Wfoo case, and
Expand All @@ -281,24 +278,26 @@ if test "x$CXXFLAGS_overridden" = "xno"; then
AX_CHECK_COMPILE_FLAG([-Wdeprecated-copy],[CXXFLAGS="$CXXFLAGS -Wno-deprecated-copy"],,[[$CXXFLAG_WERROR]])
fi

enable_hwcrc32=no
enable_sse42=no
enable_sse41=no
enable_avx2=no
enable_shani=no

if test "x$use_asm" = "xyes"; then

# Check for optional instruction set support. Enabling these does _not_ imply that all code will
# be compiled with them, rather that specific objects/libs may use them after checking for runtime
# compatibility.
dnl Check for optional instruction set support. Enabling these does _not_ imply that all code will
dnl be compiled with them, rather that specific objects/libs may use them after checking for runtime
dnl compatibility.

dnl x86
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
AC_MSG_CHECKING(for assembler crc32 support)
AC_MSG_CHECKING(for SSE4.2 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
Expand All @@ -313,7 +312,7 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
l = _mm_crc32_u64(l, 0);
return l;
]])],
[ AC_MSG_RESULT(yes); enable_hwcrc32=yes],
[ AC_MSG_RESULT(yes); enable_sse42=yes],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"
Expand All @@ -323,11 +322,7 @@ CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
AC_MSG_CHECKING(for SSE4.1 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>
#endif
]],[[
__m128i l = _mm_set1_epi32(0);
return _mm_extract_epi32(l, 3);
Expand All @@ -337,6 +332,21 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
AC_MSG_CHECKING(for AVX2 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#include <immintrin.h>
]],[[
__m256i l = _mm256_set1_epi32(0);
return _mm256_extract_epi32(l, 7);
]])],
[ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
AC_MSG_CHECKING(for SHA-NI intrinsics)
Expand All @@ -354,6 +364,24 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

# ARM
AX_CHECK_COMPILE_FLAG([-march=armv8-a+crc+crypto],[[ARM_CRC_CXXFLAGS="-march=armv8-a+crc+crypto"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $ARM_CRC_CXXFLAGS"
AC_MSG_CHECKING(for ARM CRC32 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <arm_acle.h>
#include <arm_neon.h>
]],[[
__crc32cb(0, 0); __crc32ch(0, 0); __crc32cw(0, 0); __crc32cd(0, 0);
vmull_p64(0, 0);
]])],
[ AC_MSG_RESULT(yes); enable_arm_crc=yes; ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

fi

CPPFLAGS="$CPPFLAGS -DBOOST_SPIRIT_THREADSAFE -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
Expand Down Expand Up @@ -1117,10 +1145,11 @@ AM_CONDITIONAL([USE_QRCODE], [test x$use_qr = xyes])
AM_CONDITIONAL([USE_LCOV],[test x$use_lcov = xyes])
AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
AM_CONDITIONAL([ENABLE_SSE42],[test x$enable_sse42 = xyes])
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes])
AM_CONDITIONAL([ENABLE_ARM_CRC],[test x$enable_arm_crc = xyes])
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
Expand Down
4 changes: 4 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://opensource.org/licenses/mit-license.php.

# Pattern rule to print variables, e.g. make print-top_srcdir
print-%: FORCE
@echo '$*'='$($*)'

DIST_SUBDIRS = univalue

AM_LDFLAGS = ${libcurl_LIBS} $(PTHREAD_CFLAGS) $(LIBTOOL_LDFLAGS) $(HARDENED_LDFLAGS)
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.leveldb.include
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ leveldb_libmemenv_a_SOURCES += leveldb/helpers/memenv/memenv.h

leveldb_libleveldb_sse42_a_CPPFLAGS = $(leveldb_libleveldb_a_CPPFLAGS)
leveldb_libleveldb_sse42_a_CXXFLAGS = $(leveldb_libleveldb_a_CXXFLAGS)
if ENABLE_HWCRC32
if ENABLE_SSE42
leveldb_libleveldb_sse42_a_CPPFLAGS += -DLEVELDB_PLATFORM_POSIX_SSE
leveldb_libleveldb_sse42_a_CXXFLAGS += $(SSE42_CXXFLAGS)
endif
Expand Down
5 changes: 4 additions & 1 deletion src/crypto/sha256_avx2.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
// Copyright (c) 2017-2019 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or https://www.opensource.org/licenses/mit-license.php.

#ifdef ENABLE_AVX2

#include <stdint.h>
#include <immintrin.h>

#include <crypto/sha256.h>
#include <crypto/common.h>

namespace sha256d64_avx2 {
Expand Down
41 changes: 19 additions & 22 deletions src/crypto/sha256_shani.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) 2018 The Bitcoin Core developers
// Copyright (c) 2018-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or https://opensource.org/licenses/mit-license.php.
// file COPYING or https://www.opensource.org/licenses/mit-license.php.
//
// Based on https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-x86.c,
// Written and placed in public domain by Jeffrey Walton.
Expand All @@ -11,14 +11,11 @@
#include <stdint.h>
#include <immintrin.h>

#include <crypto/common.h>


namespace {

const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);
const __m128i INIT0 = _mm_set_epi64x(0x6a09e667bb67ae85ull, 0x510e527f9b05688cull);
const __m128i INIT1 = _mm_set_epi64x(0x3c6ef372a54ff53aull, 0x1f83d9ab5be0cd19ull);
alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};
alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};

void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
{
Expand Down Expand Up @@ -68,12 +65,12 @@ void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1)

__m128i inline __attribute__((always_inline)) Load(const unsigned char* in)
{
return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), MASK);
return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), _mm_load_si128((const __m128i*)MASK));
}

void inline __attribute__((always_inline)) Save(unsigned char* out, __m128i s)
{
_mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, MASK));
_mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i*)MASK)));
}
}

Expand Down Expand Up @@ -150,8 +147,8 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
__m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1;

/* Transform 1 */
bs0 = as0 = INIT0;
bs1 = as1 = INIT1;
bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
am0 = Load(in);
bm0 = Load(in + 64);
QuadRound(as0, as1, am0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
Expand Down Expand Up @@ -220,10 +217,10 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
ShiftMessageC(bm1, bm2, bm3);
QuadRound(as0, as1, am3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
as0 = _mm_add_epi32(as0, INIT0);
bs0 = _mm_add_epi32(bs0, INIT0);
as1 = _mm_add_epi32(as1, INIT1);
bs1 = _mm_add_epi32(bs1, INIT1);
as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));

/* Transform 2 */
aso0 = as0;
Expand Down Expand Up @@ -276,8 +273,8 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
bm1 = bs1;

/* Transform 3 */
bs0 = as0 = INIT0;
bs1 = as1 = INIT1;
bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
QuadRound(as0, as1, am0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
QuadRound(bs0, bs1, bm0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
QuadRound(as0, as1, am1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
Expand Down Expand Up @@ -340,10 +337,10 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
ShiftMessageC(bm1, bm2, bm3);
QuadRound(as0, as1, am3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
as0 = _mm_add_epi32(as0, INIT0);
bs0 = _mm_add_epi32(bs0, INIT0);
as1 = _mm_add_epi32(as1, INIT1);
bs1 = _mm_add_epi32(bs1, INIT1);
as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));

/* Extract hash into out */
Unshuffle(as0, as1);
Expand Down
5 changes: 4 additions & 1 deletion src/crypto/sha256_sse41.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
// Copyright (c) 2018-2019 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or https://www.opensource.org/licenses/mit-license.php.

#ifdef ENABLE_SSE41

#include <stdint.h>
#include <immintrin.h>

#include <crypto/sha256.h>
#include <crypto/common.h>

namespace sha256d64_sse41 {
Expand Down

0 comments on commit d57ca01

Please sign in to comment.