Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make SIMD configure tests work for MacOS multiarch builds #78

Merged
merged 2 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,14 @@ task:
package_install_script:
- brew install autoconf automake libtool

<< : *COMPILE
# Try building and testing a multiarch library
compile_script:
- autoreconf -i
- ./configure CFLAGS="-g -O3 -Wall -Werror -arch arm64 -arch x86_64"
- make -j4

test_script:
- make check

# # ----------
# # Windows MINGW.
Expand Down
101 changes: 54 additions & 47 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -122,94 +122,101 @@ dnl Count parts needed to build rANS_static32x16pr_sse4.c
sse4_prerequisites=""

dnl Check if we can use our SSSE3 implementations of rANS 32x16 codec.
AX_CHECK_COMPILE_FLAG([-mssse3], [
MSSSE3=-mssse3
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MSSSE3])
AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if the compiler can issue SSSE3 instructions.])
], [], [], [AC_LANG_PROGRAM([[
HTS_CHECK_COMPILE_FLAGS_NEEDED([ssse3], [-mssse3], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_shuffle_epi8(a, b);
return *((char *) &c);
]])
#endif
]])], [
MSSSE3="$flags_needed"
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MSSSE3])
AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.])
])

dnl Check if we can use popcnt instructions
AX_CHECK_COMPILE_FLAG([-mpopcnt], [
MPOPCNT=-mpopcnt
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MPOPCNT])
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if the compiler can issue popcnt instructions.])
], [], [], [AC_LANG_PROGRAM([[
HTS_CHECK_COMPILE_FLAGS_NEEDED([popcnt], [-mpopcnt], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
unsigned int i = _mm_popcnt_u32(1);
return i != 1;
]])
#endif
]])], [
MPOPCNT="$flags_needed"
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MPOPCNT])
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
])

dnl Check if we can use our SSE4.1 too. This *may* always imply SSSE3?
dnl It may be easier just to target an old era of cpu than -mssse3 -msse4.1
dnl -mpopcnt. Eg -march=nehalem. I don't know how wide spread that is.
AX_CHECK_COMPILE_FLAG([-msse4.1], [
MSSE4_1=-msse4.1
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MSSE4_1])
AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if the compiler can issue SSE4.1 instructions.])
], [], [], [AC_LANG_PROGRAM([[
HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_max_epu32(a, b);
return *((char *) &c);
]])
#endif
]])], [
MSSE4_1="$flags_needed"
sse4_prerequisites="o$sse4_prerequisites"
AC_SUBST([MSSE4_1])
AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled.])
])
AM_CONDITIONAL([RANS_32x16_SSE4],[test "x$sse4_prerequisites" = "xooo"])

dnl Check if we can use our AVX2 implementations.
AX_CHECK_COMPILE_FLAG([-mavx2], [
MAVX2=-mavx2
AC_SUBST([MAVX2])
AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if the compiler can issue AVX2 instructions.])
], [], [], [AC_LANG_PROGRAM([[
build_rans_avx2=no
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
__m256i b = _mm256_add_epi32(a, a);
long long c = _mm256_extract_epi64(b, 0);
return (int) c;
]])
#endif
]])], [
MAVX2="$flags_needed"
build_rans_avx2=yes
AC_SUBST([MAVX2])
AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.])
])
AM_CONDITIONAL([RANS_32x16_AVX2],[test "x$MAVX2" != "x"])
AM_CONDITIONAL([RANS_32x16_AVX2],[test "$build_rans_avx2" = yes])

dnl Check also if we have AVX512. If so this overrides AVX2
AX_CHECK_COMPILE_FLAG([-mavx512f], [
MAVX512=-mavx512f
AC_SUBST([MAVX512])
AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if the compiler can issue AVX512F instructions.])
], [], [], [AC_LANG_PROGRAM([[
build_rans_avx512=no
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m512i a = _mm512_set1_epi32(1);
__m512i b = _mm512_add_epi32(a, a);
return *((char *) &b);
]])
#endif
]])], [
MAVX512="$flags_needed"
build_rans_avx512=yes
AC_SUBST([MAVX512])
AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.])
])
AM_CONDITIONAL([RANS_32x16_AVX512],[test "x$MAVX512" != "x"])

dnl Detect ARM Neon availability
AC_CACHE_CHECK([whether C compiler supports ARM Neon], [htscodecs_cv_have_neon], [
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
#include "arm_neon.h"
]], [[
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vaddq_s32(a, a);
return *((char *) &b);
]])], [htscodecs_cv_have_neon=yes], [htscodecs_cv_have_neon=no])])
AM_CONDITIONAL([RANS_32x16_NEON],[test "$htscodecs_cv_have_neon" = yes])
AM_CONDITIONAL([RANS_32x16_AVX512],[test "$build_rans_avx512" = yes])

AC_SUBST([HTSCODECS_SIMD_SRC])

Expand Down
4 changes: 1 addition & 3 deletions htscodecs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ libhtscodecs_base_src = \
rANS_word.h \
rANS_static32x16pr.c \
rANS_static32x16pr.h \
rANS_static32x16pr_neon.c \
rANS_static16_int.h \
permute.h \
tokenise_name3.c \
Expand Down Expand Up @@ -86,9 +87,6 @@ librANS_static32x16pr_avx512_la_SOURCES = rANS_static32x16pr_avx512.c
librANS_static32x16pr_avx512_la_CFLAGS = @MAVX512@
libhtscodecs_la_LIBADD += librANS_static32x16pr_avx512.la
endif
if RANS_32x16_NEON
libhtscodecs_la_SOURCES += rANS_static32x16pr_neon.c
endif

libhtscodecs_la_LDFLAGS = -version-info @VERS_CURRENT@:@VERS_REVISION@:@VERS_AGE@
libhtscodecs_la_LIBADD += -lm
Expand Down
5 changes: 4 additions & 1 deletion htscodecs/rANS_static32x16pr_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#include "config.h"

#ifdef HAVE_AVX2
#if defined(__x86_64__) && defined(HAVE_AVX2)

#include <stdint.h>
#include <stdlib.h>
Expand Down Expand Up @@ -1644,4 +1644,7 @@ unsigned char *rans_uncompress_O1_32x16_avx2(unsigned char *in,

return NULL;
}
#else // HAVE_AVX2
// Prevent "empty translation unit" errors when building without AVX2
const char *rANS_static32x16pr_avx2_disabled = "No AVX2";
#endif // HAVE_AVX2
6 changes: 4 additions & 2 deletions htscodecs/rANS_static32x16pr_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

#include "config.h"

#ifdef HAVE_AVX512
#if defined(__x86_64__) && defined(HAVE_AVX512)

#include <stdint.h>
#include <stdlib.h>
Expand Down Expand Up @@ -1066,5 +1066,7 @@ unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in,

return NULL;
}

#else // HAVE_AVX512
// Prevent "empty translation unit" errors when building without AVX512
const char *rANS_static32x16pr_avx512_disabled = "No AVX512";
#endif // HAVE_AVX512
3 changes: 3 additions & 0 deletions htscodecs/rANS_static32x16pr_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -1956,4 +1956,7 @@ unsigned char *rans_uncompress_O1_32x16_neon(unsigned char *in,
}

#undef MAGIC2
#else /* __ARM_NEON */
// Prevent "empty translation unit" errors when building without NEON
const char *rANS_static32x16pr_neon_disabled = "No NEON";
#endif /* __ARM_NEON */
7 changes: 5 additions & 2 deletions htscodecs/rANS_static32x16pr_sse4.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

#include "config.h"

#if defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT)
#if defined(__x86_64__) && \
defined(HAVE_SSE4_1) && defined(HAVE_SSSE3) && defined(HAVE_POPCNT)

#include <stdint.h>
#include <stdlib.h>
Expand Down Expand Up @@ -1767,5 +1768,7 @@ unsigned char *rans_uncompress_O1_32x16_sse4(unsigned char *in,

return NULL;
}

#else // HAVE_SSE4_1 and HAVE_SSSE3
// Prevent "empty translation unit" errors when building without SSE4 etc.
const char *rANS_static32x16pr_sse4_disabled = "No SSE4";
#endif // HAVE_SSE4_1 and HAVE_SSSE3
57 changes: 0 additions & 57 deletions m4/ax_check_compile_flag.m4

This file was deleted.

63 changes: 63 additions & 0 deletions m4/hts_check_compile_flags_needed.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# hts_check_compile_flags_needed.m4
#
# SYNOPSIS
#
# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
#
# DESCRIPTION
#
# Check whether the given FLAGS are required to build and link INPUT with
# the current language's compiler. Compilation and linking are first
# tries without FLAGS. If that fails it then tries to compile and
# link again with FLAGS.
#
# FEATURE describes the feature being tested, and is used when printing
# messages and to name the cache entry (along with the tested flags).
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure. In ACTION-SUCCESS, $flags_needed will be set to
# either an empty string or FLAGS depending on the test results.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably
# isn't very useful.
#
# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <[email protected]>
# Copyright (c) 2011 Maarten Bosmans <[email protected]>
# Copyright (c) 2023 Robert Davies <[email protected]>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.

# AX_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])

AC_DEFUN([HTS_CHECK_COMPILE_FLAGS_NEEDED],
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
AS_VAR_PUSHDEF([CACHEVAR],[hts_cv_check_[]_AC_LANG_ABBREV[]flags_needed_$1_$6_$2])dnl
AC_CACHE_CHECK([_AC_LANG compiler flags needed for $1], CACHEVAR, [
AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[none])],
[ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $6 $2"
AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[$2])],
[AS_VAR_SET(CACHEVAR,[unsupported])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])])
AS_VAR_IF(CACHEVAR,unsupported, [
m4_default([$5], :)
], [
AS_VAR_IF(CACHEVAR,none,[flags_needed=""], [flags_needed="$CACHEVAR"])
m4_default([$4], :)
])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl HTS_CHECK_COMPILE_FLAGS_NEEDED