Skip to content

Commit

Permalink
lstm: Move class SIMDDetect to new source file and improve code
Browse files Browse the repository at this point in the history
Modify also the code to use a singleton. This simplifies the code as
no locking is needed. It also slightly improves the performance because
no check whether the architecture was tested is needed.

Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Dec 27, 2016
1 parent e949812 commit 19616b0
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 83 deletions.
2 changes: 2 additions & 0 deletions api/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ libtesseract_api_la_LIBADD = \
../wordrec/libtesseract_wordrec.la \
../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la \
../arch/libtesseract_arch.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_sse.la \
../lstm/libtesseract_lstm.la \
Expand Down Expand Up @@ -57,6 +58,7 @@ libtesseract_la_LIBADD = \
../wordrec/libtesseract_wordrec.la \
../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la \
../arch/libtesseract_arch.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_sse.la \
../lstm/libtesseract_lstm.la \
Expand Down
12 changes: 8 additions & 4 deletions arch/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AM_CPPFLAGS += -I$(top_srcdir)/ccutil
AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer
AUTOMAKE_OPTIONS = subdir-objects
SUBDIRS =
AM_CXXFLAGS =
Expand All @@ -8,15 +8,17 @@ AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
AM_CPPFLAGS += -DTESS_EXPORTS
endif

include_HEADERS = \
dotproductavx.h dotproductsse.h
include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h

noinst_HEADERS =
noinst_HEADERS =

if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
noinst_LTLIBRARIES += libtesseract_arch.la
else
lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
lib_LTLIBRARIES += libtesseract_arch.la
libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
endif
Expand All @@ -28,6 +30,8 @@ if SSE41_OPT
libtesseract_sse_la_CXXFLAGS = -msse4.1
endif

libtesseract_arch_la_SOURCES = simddetect.cpp

libtesseract_avx_la_SOURCES = dotproductavx.cpp

libtesseract_sse_la_SOURCES = dotproductsse.cpp
Expand Down
66 changes: 66 additions & 0 deletions arch/simddetect.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include "simddetect.h"
#include "tprintf.h"

#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
# if !defined(ANDROID_BUILD)
# define X86_BUILD 1
# endif // !ANDROID_BUILD
#endif // x86 target

#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
# include <cpuid.h>
# elif defined(_WIN32)
# include <intrin.h>
# endif
#endif

SIMDDetect SIMDDetect::detector;

// If true, then AVX has been detected.
bool SIMDDetect::avx_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;

// Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
SIMDDetect::SIMDDetect() {
#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
}
# elif defined(_WIN32)
int cpuInfo[4];
__cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
}
# endif
if (avx_available_) tprintf("Found AVX\n");
if (sse_available_) tprintf("Found SSE\n");
#endif // X86_BUILD
}
43 changes: 43 additions & 0 deletions arch/simddetect.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
// Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() {
return detector.avx_available_;
}
// Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() {
return detector.sse_available_;
}

private:
// Constructor, must set all static member variables.
SIMDDetect();

private:
// Singleton.
static SIMDDetect detector;
// If true, then AVX has been detected.
static bool avx_available_;
// If true, then SSe4.1 has been detected.
static bool sse_available_;
};
83 changes: 4 additions & 79 deletions lstm/weightmatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,89 +18,14 @@

#include "weightmatrix.h"

#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
# if !defined(ANDROID_BUILD)
# define X86_BUILD 1
# endif // !ANDROID_BUILD
#endif // x86 target

#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
# include <cpuid.h>
# elif defined(_WIN32)
# include <intrin.h>
# endif
#endif
#include "dotproductavx.h"
#include "dotproductsse.h"
#include "simddetect.h"
#include "statistc.h"
#include "svutil.h"
#include "tprintf.h"

namespace tesseract {

// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
SIMDDetect()
: arch_tested_(false), avx_available_(false), sse_available_(false) {}

// Returns true if AVX is available on this system.
bool IsAVXAvailable() {
if (!arch_tested_) TestArchitecture();
return avx_available_;
}
// Returns true if SSE4.1 is available on this system.
bool IsSSEAvailable() {
if (!arch_tested_) TestArchitecture();
return sse_available_;
}

private:
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
void TestArchitecture() {
SVAutoLock lock(&arch_mutex_);
if (!arch_tested_) {
#if defined(X86_BUILD)
# if defined(__linux__) || defined(__MINGW32__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
}
# elif defined(_WIN32)
int cpuInfo[4];
__cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
}
# endif
if (avx_available_) tprintf("Found AVX\n");
if (sse_available_) tprintf("Found SSE\n");
#endif // X86_BUILD
arch_tested_ = true;
}
}

private:
// Detect architecture in only a single thread.
SVMutex arch_mutex_;
// Flag set to true after TestArchitecture has been called.
bool arch_tested_;
// If true, then AVX has been detected.
bool avx_available_;
// If true, then SSe4.1 has been detected.
bool sse_available_;
};

static SIMDDetect detector;

// Copies the whole input transposed, converted to double, into *this.
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double>& input) {
int width = input.dim1();
Expand Down Expand Up @@ -258,7 +183,7 @@ void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
for (int i = 0; i < num_out; ++i) {
const inT8* Wi = wi_[i];
int total = 0;
if (detector.IsSSEAvailable()) {
if (SIMDDetect::IsSSEAvailable()) {
total = IntDotProductSSE(u, Wi, num_in);
} else {
for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j];
Expand Down Expand Up @@ -410,8 +335,8 @@ double WeightMatrix::DotProduct(const double* u, const double* v, int n) {
// is about 8% faster than sse. This suggests that the time is memory
// bandwidth constrained and could benefit from holding the reused vector
// in AVX registers.
if (detector.IsAVXAvailable()) return DotProductAVX(u, v, n);
if (detector.IsSSEAvailable()) return DotProductSSE(u, v, n);
if (SIMDDetect::IsAVXAvailable()) return DotProductAVX(u, v, n);
if (SIMDDetect::IsSSEAvailable()) return DotProductSSE(u, v, n);
double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
return total;
Expand Down

0 comments on commit 19616b0

Please sign in to comment.