diff --git a/api/Makefile.am b/api/Makefile.am index 195d16c846..e4d6f85659 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -28,6 +28,7 @@ libtesseract_api_la_LIBADD = \ ../wordrec/libtesseract_wordrec.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ + ../arch/libtesseract_arch.la \ ../arch/libtesseract_avx.la \ ../arch/libtesseract_sse.la \ ../lstm/libtesseract_lstm.la \ @@ -57,6 +58,7 @@ libtesseract_la_LIBADD = \ ../wordrec/libtesseract_wordrec.la \ ../classify/libtesseract_classify.la \ ../dict/libtesseract_dict.la \ + ../arch/libtesseract_arch.la \ ../arch/libtesseract_avx.la \ ../arch/libtesseract_sse.la \ ../lstm/libtesseract_lstm.la \ diff --git a/arch/Makefile.am b/arch/Makefile.am index ba7f1a814d..20c12affa6 100644 --- a/arch/Makefile.am +++ b/arch/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS += -I$(top_srcdir)/ccutil +AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer AUTOMAKE_OPTIONS = subdir-objects SUBDIRS = AM_CXXFLAGS = @@ -8,15 +8,17 @@ AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden AM_CPPFLAGS += -DTESS_EXPORTS endif -include_HEADERS = \ - dotproductavx.h dotproductsse.h +include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h -noinst_HEADERS = +noinst_HEADERS = if !USING_MULTIPLELIBS noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la +noinst_LTLIBRARIES += libtesseract_arch.la else lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la +lib_LTLIBRARIES += libtesseract_arch.la +libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) endif @@ -28,6 +30,8 @@ if SSE41_OPT libtesseract_sse_la_CXXFLAGS = -msse4.1 endif +libtesseract_arch_la_SOURCES = simddetect.cpp + libtesseract_avx_la_SOURCES = dotproductavx.cpp libtesseract_sse_la_SOURCES = dotproductsse.cpp diff --git a/arch/simddetect.cpp b/arch/simddetect.cpp new file mode 100644 index 0000000000..53e7ccf1db --- /dev/null +++ b/arch/simddetect.cpp @@ -0,0 +1,66 @@ +/////////////////////////////////////////////////////////////////////// +// File: simddetect.h +// Description: Architecture detector. +// Author: Stefan Weil (based on code from Ray Smith) +// +// (C) Copyright 2014, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "simddetect.h" +#include "tprintf.h" + +#undef X86_BUILD +#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32) +# if !defined(ANDROID_BUILD) +# define X86_BUILD 1 +# endif // !ANDROID_BUILD +#endif // x86 target + +#if defined(X86_BUILD) +# if defined(__linux__) || defined(__MINGW32__) +# include +# elif defined(_WIN32) +# include +# endif +#endif + +SIMDDetect SIMDDetect::detector; + +// If true, then AVX has been detected. +bool SIMDDetect::avx_available_; +// If true, then SSe4.1 has been detected. +bool SIMDDetect::sse_available_; + +// Constructor. +// Tests the architecture in a system-dependent way to detect AVX, SSE and +// any other available SIMD equipment. +SIMDDetect::SIMDDetect() { +#if defined(X86_BUILD) +# if defined(__linux__) || defined(__MINGW32__) + unsigned int eax, ebx, ecx, edx; + if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { + sse_available_ = (ecx & 0x00080000) != 0; + avx_available_ = (ecx & 0x10000000) != 0; + } +# elif defined(_WIN32) + int cpuInfo[4]; + __cpuid(cpuInfo, 0); + if (cpuInfo[0] >= 1) { + __cpuid(cpuInfo, 1); + sse_available_ = (cpuInfo[2] & 0x00080000) != 0; + avx_available_ = (cpuInfo[2] & 0x10000000) != 0; + } +# endif + if (avx_available_) tprintf("Found AVX\n"); + if (sse_available_) tprintf("Found SSE\n"); +#endif // X86_BUILD +} diff --git a/arch/simddetect.h b/arch/simddetect.h new file mode 100644 index 0000000000..67ba0483fc --- /dev/null +++ b/arch/simddetect.h @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////// +// File: simddetect.h +// Description: Architecture detector. +// Author: Stefan Weil (based on code from Ray Smith) +// +// (C) Copyright 2014, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +// Architecture detector. Add code here to detect any other architectures for +// SIMD-based faster dot product functions. Intended to be a single static +// object, but it does no real harm to have more than one. +class SIMDDetect { + public: + // Returns true if AVX is available on this system. + static inline bool IsAVXAvailable() { + return detector.avx_available_; + } + // Returns true if SSE4.1 is available on this system. + static inline bool IsSSEAvailable() { + return detector.sse_available_; + } + + private: + // Constructor, must set all static member variables. + SIMDDetect(); + + private: + // Singleton. + static SIMDDetect detector; + // If true, then AVX has been detected. + static bool avx_available_; + // If true, then SSe4.1 has been detected. + static bool sse_available_; +}; diff --git a/lstm/weightmatrix.cpp b/lstm/weightmatrix.cpp index 84dd9b77ab..477de46691 100644 --- a/lstm/weightmatrix.cpp +++ b/lstm/weightmatrix.cpp @@ -18,89 +18,14 @@ #include "weightmatrix.h" -#undef X86_BUILD -#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32) -# if !defined(ANDROID_BUILD) -# define X86_BUILD 1 -# endif // !ANDROID_BUILD -#endif // x86 target - -#if defined(X86_BUILD) -# if defined(__linux__) || defined(__MINGW32__) -# include -# elif defined(_WIN32) -# include -# endif -#endif #include "dotproductavx.h" #include "dotproductsse.h" +#include "simddetect.h" #include "statistc.h" -#include "svutil.h" #include "tprintf.h" namespace tesseract { -// Architecture detector. Add code here to detect any other architectures for -// SIMD-based faster dot product functions. Intended to be a single static -// object, but it does no real harm to have more than one. -class SIMDDetect { - public: - SIMDDetect() - : arch_tested_(false), avx_available_(false), sse_available_(false) {} - - // Returns true if AVX is available on this system. - bool IsAVXAvailable() { - if (!arch_tested_) TestArchitecture(); - return avx_available_; - } - // Returns true if SSE4.1 is available on this system. - bool IsSSEAvailable() { - if (!arch_tested_) TestArchitecture(); - return sse_available_; - } - - private: - // Tests the architecture in a system-dependent way to detect AVX, SSE and - // any other available SIMD equipment. - void TestArchitecture() { - SVAutoLock lock(&arch_mutex_); - if (!arch_tested_) { -#if defined(X86_BUILD) -# if defined(__linux__) || defined(__MINGW32__) - unsigned int eax, ebx, ecx, edx; - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { - sse_available_ = (ecx & 0x00080000) != 0; - avx_available_ = (ecx & 0x10000000) != 0; - } -# elif defined(_WIN32) - int cpuInfo[4]; - __cpuid(cpuInfo, 0); - if (cpuInfo[0] >= 1) { - __cpuid(cpuInfo, 1); - sse_available_ = (cpuInfo[2] & 0x00080000) != 0; - avx_available_ = (cpuInfo[2] & 0x10000000) != 0; - } -# endif - if (avx_available_) tprintf("Found AVX\n"); - if (sse_available_) tprintf("Found SSE\n"); -#endif // X86_BUILD - arch_tested_ = true; - } - } - - private: - // Detect architecture in only a single thread. - SVMutex arch_mutex_; - // Flag set to true after TestArchitecture has been called. - bool arch_tested_; - // If true, then AVX has been detected. - bool avx_available_; - // If true, then SSe4.1 has been detected. - bool sse_available_; -}; - -static SIMDDetect detector; - // Copies the whole input transposed, converted to double, into *this. void TransposedArray::Transpose(const GENERIC_2D_ARRAY& input) { int width = input.dim1(); @@ -258,7 +183,7 @@ void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const { for (int i = 0; i < num_out; ++i) { const inT8* Wi = wi_[i]; int total = 0; - if (detector.IsSSEAvailable()) { + if (SIMDDetect::IsSSEAvailable()) { total = IntDotProductSSE(u, Wi, num_in); } else { for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j]; @@ -410,8 +335,8 @@ double WeightMatrix::DotProduct(const double* u, const double* v, int n) { // is about 8% faster than sse. This suggests that the time is memory // bandwidth constrained and could benefit from holding the reused vector // in AVX registers. - if (detector.IsAVXAvailable()) return DotProductAVX(u, v, n); - if (detector.IsSSEAvailable()) return DotProductSSE(u, v, n); + if (SIMDDetect::IsAVXAvailable()) return DotProductAVX(u, v, n); + if (SIMDDetect::IsSSEAvailable()) return DotProductSSE(u, v, n); double total = 0.0; for (int k = 0; k < n; ++k) total += u[k] * v[k]; return total;