Skip to content

Commit

Permalink
lstm: Update AVX / SSE support
Browse files Browse the repository at this point in the history
* Fix compiler warning (see below)

* Use Linux code for Mingw-w64, too

* Simplify conditional code by using X86_BUILD instead of NONX86_BUILD

* Remove unneeded call of __get_cpuid_max (already called by __get_cpuid)

* Remove unneeded #undef statement

gcc report:

lstm/weightmatrix.cpp: In static member function
 'static double tesseract::WeightMatrix::DotProduct(const double*, const double*, int)':
weightmatrix.cpp:67:29: warning:
 'ecx' may be used uninitialized in this function [-Wmaybe-uninitialized]
       avx_available_ = (ecx & 0x10000000) != 0;
                             ^
lstm/weightmatrix.cpp:64:30: note: 'ecx' was declared here
       unsigned int eax, ebx, ecx, edx;
                              ^
Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Dec 27, 2016
1 parent fc94820 commit b2a0262
Showing 1 changed file with 24 additions and 19 deletions.
43 changes: 24 additions & 19 deletions lstm/weightmatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@

#include "weightmatrix.h"

#undef NONX86_BUILD
#if !defined(__x86_64__) && !defined(__i386__) && !defined(_WIN32) || defined(ANDROID_BUILD)
#define NONX86_BUILD 1
#endif

#if defined(__linux__) && !defined(NONX86_BUILD)
#include <cpuid.h>
#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
# if !defined(ANDROID_BUILD)
# define X86_BUILD 1
# endif // !ANDROID_BUILD
#endif // x86 target

#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
# include <cpuid.h>
# elif defined(_WIN32)
# endif
#endif
#include "dotproductavx.h"
#include "dotproductsse.h"
Expand Down Expand Up @@ -58,18 +63,20 @@ class SIMDDetect {
// any other available SIMD equipment.
void TestArchitecture() {
SVAutoLock lock(&arch_mutex_);
if (arch_tested_) return;
#if defined(__linux__) && !defined(NONX86_BUILD)
if (__get_cpuid_max(0, NULL) >= 1) {
if (!arch_tested_) {
#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
unsigned int eax, ebx, ecx, edx;
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
}
# endif
if (avx_available_) tprintf("Found AVX\n");
if (sse_available_) tprintf("Found SSE\n");
#endif // X86_BUILD
arch_tested_ = true;
}
#endif
if (avx_available_) tprintf("Found AVX\n");
if (sse_available_) tprintf("Found SSE\n");
arch_tested_ = true;
}

private:
Expand Down Expand Up @@ -439,5 +446,3 @@ void WeightMatrix::MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double>& w,
}

} // namespace tesseract.

#undef NONX86_BUILD

0 comments on commit b2a0262

Please sign in to comment.