diff --git a/gcore/gdalsse_priv.h b/gcore/gdalsse_priv.h index 18406dd42239..7005ae7671b5 100644 --- a/gcore/gdalsse_priv.h +++ b/gcore/gdalsse_priv.h @@ -39,13 +39,18 @@ #if (defined(__x86_64) || defined(_M_X64) || defined(USE_SSE2)) && \ !defined(USE_SSE2_EMULATION) +#include + +#ifdef USE_NEON_OPTIMIZATIONS +#include "include_sse2neon.h" +#else /* Requires SSE2 */ #include -#include #ifdef __SSE4_1__ #include #endif +#endif #include "gdal_priv_templates.hpp" diff --git a/gcore/overview.cpp b/gcore/overview.cpp index 130d569d904f..ba1b6c6d40d8 100644 --- a/gcore/overview.cpp +++ b/gcore/overview.cpp @@ -52,9 +52,15 @@ #include "gdal_thread_pool.h" #include "gdalwarper.h" +#ifdef USE_NEON_OPTIMIZATIONS +#include "include_sse2neon.h" +#define USE_SSE2 + +#include "gdalsse_priv.h" + // Restrict to 64bit processors because they are guaranteed to have SSE2, // or if __AVX2__ is defined. -#if defined(__x86_64) || defined(_M_X64) || defined(__AVX2__) +#elif defined(__x86_64) || defined(_M_X64) || defined(__AVX2__) #define USE_SSE2 #include "gdalsse_priv.h" @@ -320,7 +326,7 @@ inline GUInt16 ComputeIntegerRMS_4values(double sumSquares) /* QuadraticMeanByteSSE2OrAVX2() */ /************************************************************************/ -#ifdef __SSE4_1__ +#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) #define sse2_packus_epi32 _mm_packus_epi32 #else inline __m128i sse2_packus_epi32(__m128i a, __m128i b) @@ -335,7 +341,7 @@ inline __m128i sse2_packus_epi32(__m128i a, __m128i b) } #endif -#ifdef __SSSE3__ +#if defined(__SSSE3__) || defined(USE_NEON_OPTIMIZATIONS) #define sse2_hadd_epi16 _mm_hadd_epi16 #else inline __m128i sse2_hadd_epi16(__m128i a, __m128i b)