From 81db6f1f898da33fdb39e1f71fe8a521d9a330c0 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 13 Jul 2021 22:30:56 +0200 Subject: [PATCH] Implement fast float dotproduct for SSE IntSimdMatrix Signed-off-by: Stefan Weil --- Makefile.am | 3 +++ src/arch/intsimdmatrixsse.cpp | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index afb635b67f..c5e7b80bfb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -183,6 +183,9 @@ endif if HAVE_SSE4_1 libtesseract_sse_la_CXXFLAGS = -msse4.1 libtesseract_sse_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil +if OPENMP_SIMD +libtesseract_sse_la_CXXFLAGS += -fopenmp-simd -DOPENMP_SIMD +endif libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp libtesseract_la_LIBADD += libtesseract_sse.la noinst_LTLIBRARIES += libtesseract_sse.la diff --git a/src/arch/intsimdmatrixsse.cpp b/src/arch/intsimdmatrixsse.cpp index 7407f6f5a1..a46b319fd2 100644 --- a/src/arch/intsimdmatrixsse.cpp +++ b/src/arch/intsimdmatrixsse.cpp @@ -23,7 +23,36 @@ # endif #elif defined(FAST_FLOAT) namespace tesseract { -const IntSimdMatrix *IntSimdMatrix::intSimdMatrixSSE = nullptr; +static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const float *scales, + const int8_t *u, float *v) { + const int num_out = dim1; + const int num_in = dim2 - 1; +//#pragma omp simd collapse(2) + for (int i = 0; i < num_out; ++i) { + int total = 0; +#pragma omp simd reduction(+:total) + for (int j = 0; j < num_in; ++j) { + total += wi[j] * u[j]; + } + // Add in the bias and correct for integer values. + v[i] = (total + wi[num_in] * INT8_MAX) * scales[i]; + wi += dim2; + } +} + +static const IntSimdMatrix simdMatrix = { + matrixDotVector, + // Number of 32 bit outputs held in each register. + 1, + // Maximum number of registers that we will use to hold outputs. + 1, + // Number of 8 bit inputs in the inputs register. + 1, + // Number of inputs in each weight group. + 1 +}; + +const IntSimdMatrix *IntSimdMatrix::intSimdMatrixSSE = &simdMatrix; } #else