diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake
index 3cd5e1be94b1..764e4b19d830 100644
--- a/3rdparty/ffmpeg/ffmpeg.cmake
+++ b/3rdparty/ffmpeg/ffmpeg.cmake
@@ -1,9 +1,9 @@
-# Binaries branch name: ffmpeg/master_20210303
-# Binaries were created for OpenCV: 7ac6abe02a33bef445a5b77214ad31964e2c5cc1
-ocv_update(FFMPEG_BINARIES_COMMIT "629590c3ba09fb0c8eaa9ab858ff13d3a84ca1aa")
-ocv_update(FFMPEG_FILE_HASH_BIN32 "638065d5a0dab8a828879942375dcac4")
-ocv_update(FFMPEG_FILE_HASH_BIN64 "7f10ae2e6a080ba3714f7a38ee03ae15")
-ocv_update(FFMPEG_FILE_HASH_CMAKE "f8e65dbe4a3b4eedc0d2997e07c3f3fd")
+# Binaries branch name: ffmpeg/master_20210608
+# Binaries were created for OpenCV: eaa9228a4fdfb9c2465aea65a50ce2d16b55dce0
+ocv_update(FFMPEG_BINARIES_COMMIT "213fcd5d4897319a83207406036c4a5957fba010")
+ocv_update(FFMPEG_FILE_HASH_BIN32 "bab661341c30862fa88627130219c0a5")
+ocv_update(FFMPEG_FILE_HASH_BIN64 "ac99f9767a83103c31709628af685924")
+ocv_update(FFMPEG_FILE_HASH_CMAKE "8862c87496e2e8c375965e1277dee1c7")
function(download_win_ffmpeg script_var)
set(${script_var} "" PARENT_SCOPE)
diff --git a/3rdparty/libjpeg-turbo/CMakeLists.txt b/3rdparty/libjpeg-turbo/CMakeLists.txt
index 901669a4a8c3..3c7f29b08e95 100644
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@@ -3,10 +3,10 @@ project(${JPEG_LIBRARY} C)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough)
set(VERSION_MAJOR 2)
-set(VERSION_MINOR 0)
-set(VERSION_REVISION 6)
+set(VERSION_MINOR 1)
+set(VERSION_REVISION 0)
set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2001000)
string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -46,7 +46,6 @@ if(UNIX)
ocv_update(HAVE_UNSIGNED_SHORT 1)
# undef INCOMPLETE_TYPES_BROKEN
ocv_update(RIGHT_SHIFT_IS_UNSIGNED 0)
- ocv_update(__CHAR_UNSIGNED__ 0)
endif()
diff --git a/3rdparty/libjpeg-turbo/LICENSE.md b/3rdparty/libjpeg-turbo/LICENSE.md
index 99c9aadcc47c..a1cdad52faf4 100644
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@@ -91,7 +91,7 @@ best of our understanding.
The Modified (3-clause) BSD License
===================================
-Copyright (C)2009-2020 D. R. Commander. All Rights Reserved.
+Copyright (C)2009-2021 D. R. Commander. All Rights Reserved.
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/3rdparty/libjpeg-turbo/README.ijg b/3rdparty/libjpeg-turbo/README.ijg
index d681cf1273d4..9453c195010f 100644
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or
fitness for a particular purpose. This software is provided "AS IS", and you,
its user, assume the entire risk as to its quality and accuracy.
-This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
All Rights Reserved except as specified below.
Permission is hereby granted to use, copy, modify, and distribute this
@@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are
assumed by the product vendor.
-The IJG distribution formerly included code to read and write GIF files.
-To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
-support has been removed altogether, and the GIF writer has been simplified
-to produce "uncompressed GIFs". This technique does not use the LZW
-algorithm; the resulting GIF files are larger than usual, but are readable
-by all standard GIF decoders.
-
-We are required to state that
- "The Graphics Interchange Format(c) is the Copyright property of
- CompuServe Incorporated. GIF(sm) is a Service Mark property of
- CompuServe Incorporated."
-
-
REFERENCES
==========
diff --git a/3rdparty/libjpeg-turbo/README.md b/3rdparty/libjpeg-turbo/README.md
index 90a4a43ee1de..01e391ea7c08 100644
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@@ -3,7 +3,7 @@ Background
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
-MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm
systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
all else being equal. On other types of systems, libjpeg-turbo can still
outperform libjpeg by a significant amount, by virtue of its highly-optimized
diff --git a/3rdparty/libjpeg-turbo/jconfig.h.in b/3rdparty/libjpeg-turbo/jconfig.h.in
index 18a69a48142a..d4284d97b812 100644
--- a/3rdparty/libjpeg-turbo/jconfig.h.in
+++ b/3rdparty/libjpeg-turbo/jconfig.h.in
@@ -61,11 +61,6 @@
unsigned. */
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
-/* Define to 1 if type `char' is unsigned and you are not using gcc. */
-#ifndef __CHAR_UNSIGNED__
- #cmakedefine __CHAR_UNSIGNED__ 1
-#endif
-
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
diff --git a/3rdparty/libjpeg-turbo/jconfig.h.win.in b/3rdparty/libjpeg-turbo/jconfig.h.win.in
index 6db0b345b2df..13cceef01d13 100644
--- a/3rdparty/libjpeg-turbo/jconfig.h.win.in
+++ b/3rdparty/libjpeg-turbo/jconfig.h.win.in
@@ -18,7 +18,6 @@
#define HAVE_UNSIGNED_SHORT
#undef INCOMPLETE_TYPES_BROKEN
#undef RIGHT_SHIFT_IS_UNSIGNED
-#undef __CHAR_UNSIGNED__
/* Define "boolean" as unsigned char, not int, per Windows custom */
#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */
diff --git a/3rdparty/libjpeg-turbo/src/jccolext.c b/3rdparty/libjpeg-turbo/src/jccolext.c
index 19c955c9d6af..303b322ce674 100644
--- a/3rdparty/libjpeg-turbo/src/jccolext.c
+++ b/3rdparty/libjpeg-turbo/src/jccolext.c
@@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr2 = output_buf[2][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
- r = GETJSAMPLE(inptr[RGB_RED]);
- g = GETJSAMPLE(inptr[RGB_GREEN]);
- b = GETJSAMPLE(inptr[RGB_BLUE]);
+ r = inptr[RGB_RED];
+ g = inptr[RGB_GREEN];
+ b = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
* must be too; we do not need an explicit range-limiting operation.
@@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[0][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
- r = GETJSAMPLE(inptr[RGB_RED]);
- g = GETJSAMPLE(inptr[RGB_GREEN]);
- b = GETJSAMPLE(inptr[RGB_BLUE]);
+ r = inptr[RGB_RED];
+ g = inptr[RGB_GREEN];
+ b = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
/* Y */
outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
@@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr2 = output_buf[2][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
- outptr0[col] = GETJSAMPLE(inptr[RGB_RED]);
- outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]);
- outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]);
+ outptr0[col] = inptr[RGB_RED];
+ outptr1[col] = inptr[RGB_GREEN];
+ outptr2[col] = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
}
}
diff --git a/3rdparty/libjpeg-turbo/src/jccolor.c b/3rdparty/libjpeg-turbo/src/jccolor.c
index 036f6016d18c..bdc563c723ca 100644
--- a/3rdparty/libjpeg-turbo/src/jccolor.c
+++ b/3rdparty/libjpeg-turbo/src/jccolor.c
@@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr3 = output_buf[3][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
- r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
- g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
- b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+ r = MAXJSAMPLE - inptr[0];
+ g = MAXJSAMPLE - inptr[1];
+ b = MAXJSAMPLE - inptr[2];
/* K passes through as-is */
- outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */
+ outptr3[col] = inptr[3];
inptr += 4;
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
* must be too; we do not need an explicit range-limiting operation.
@@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[0][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
- outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */
+ outptr[col] = inptr[0];
inptr += instride;
}
}
@@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
inptr = *input_buf;
outptr = output_buf[ci][output_row];
for (col = 0; col < num_cols; col++) {
- outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+ outptr[col] = inptr[ci];
inptr += nc;
}
}
diff --git a/3rdparty/libjpeg-turbo/src/jcdctmgr.c b/3rdparty/libjpeg-turbo/src/jcdctmgr.c
index c04058e6cec9..7dae17a6e149 100644
--- a/3rdparty/libjpeg-turbo/src/jcdctmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jcdctmgr.c
@@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
- *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+ *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
}
#endif
}
@@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
- *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
- *workspaceptr++ = (FAST_FLOAT)
- (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
}
#endif
}
diff --git a/3rdparty/libjpeg-turbo/src/jchuff.c b/3rdparty/libjpeg-turbo/src/jchuff.c
index db85ce114f8b..2bce767ebd70 100644
--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@@ -4,8 +4,10 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
+ * Copyright (C) 2018, Matthias Räncker.
+ * Copyright (C) 2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -42,15 +44,19 @@
* flags (this defines __thumb__).
*/
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
+ defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
#ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
+#else
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
+#endif
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
#else
#include "jpeg_nbits_table.h"
@@ -65,31 +71,42 @@
* but must not be updated permanently until we complete the MCU.
*/
-typedef struct {
- size_t put_buffer; /* current bit-accumulation buffer */
- int put_bits; /* # of bits now in it */
- int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-} savable_state;
+#if defined(__x86_64__) && defined(__ILP32__)
+typedef unsigned long long bit_buf_type;
+#else
+typedef size_t bit_buf_type;
+#endif
-/* This macro is to work around compilers with missing or broken
- * structure assignment. You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
+/* NOTE: The more optimal Huffman encoding algorithm is only used by the
+ * intrinsics implementation of the Arm Neon SIMD extensions, which is why we
+ * retain the old Huffman encoder behavior when using the GAS implementation.
*/
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src) ((dest) = (src))
+#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
+ defined(_M_ARM) || defined(_M_ARM64))
+typedef unsigned long long simd_bit_buf_type;
#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
- ((dest).put_buffer = (src).put_buffer, \
- (dest).put_bits = (src).put_bits, \
- (dest).last_dc_val[0] = (src).last_dc_val[0], \
- (dest).last_dc_val[1] = (src).last_dc_val[1], \
- (dest).last_dc_val[2] = (src).last_dc_val[2], \
- (dest).last_dc_val[3] = (src).last_dc_val[3])
+typedef bit_buf_type simd_bit_buf_type;
#endif
+
+#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \
+ (defined(__x86_64__) && defined(__ILP32__))
+#define BIT_BUF_SIZE 64
+#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)
+#define BIT_BUF_SIZE 32
+#else
+#error Cannot determine word size
#endif
+#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8)
+typedef struct {
+ union {
+ bit_buf_type c;
+ simd_bit_buf_type simd;
+ } put_buffer; /* current bit accumulation buffer */
+ int free_bits; /* # of bits available in it */
+ /* (Neon GAS: # of bits now in it) */
+ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
typedef struct {
struct jpeg_entropy_encoder pub; /* public fields */
@@ -123,6 +140,7 @@ typedef struct {
size_t free_in_buffer; /* # of byte spaces remaining in buffer */
savable_state cur; /* Current bit buffer & DC state */
j_compress_ptr cinfo; /* dump_buffer needs access to this */
+ int simd;
} working_state;
@@ -201,8 +219,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
}
/* Initialize bit buffer to empty */
- entropy->saved.put_buffer = 0;
- entropy->saved.put_bits = 0;
+ if (entropy->simd) {
+ entropy->saved.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+ entropy->saved.free_bits = 0;
+#else
+ entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+ } else {
+ entropy->saved.put_buffer.c = 0;
+ entropy->saved.free_bits = BIT_BUF_SIZE;
+ }
/* Initialize restart stuff */
entropy->restarts_to_go = cinfo->restart_interval;
@@ -287,6 +314,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
* this lets us detect duplicate VAL entries here, and later
* allows emit_bits to detect any attempt to emit such symbols.
*/
+ MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
/* This is also a convenient place to check for out-of-range
@@ -334,94 +362,94 @@ dump_buffer(working_state *state)
/* Outputting bits to the file */
-/* These macros perform the same task as the emit_bits() function in the
- * original libjpeg code. In addition to reducing overhead by explicitly
- * inlining the code, additional performance is achieved by taking into
- * account the size of the bit buffer and waiting until it is almost full
- * before emptying it. This mostly benefits 64-bit platforms, since 6
- * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
+/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be
+ * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the
+ * byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and
+ * the speculative 0 byte will be overwritten by the next byte.
*/
-
-#define EMIT_BYTE() { \
- JOCTET c; \
- put_bits -= 8; \
- c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
- *buffer++ = c; \
- if (c == 0xFF) /* need to stuff a zero byte? */ \
- *buffer++ = 0; \
+#define EMIT_BYTE(b) { \
+ buffer[0] = (JOCTET)(b); \
+ buffer[1] = 0; \
+ buffer -= -2 + ((JOCTET)(b) < 0xFF); \
}
-#define PUT_BITS(code, size) { \
- put_bits += size; \
- put_buffer = (put_buffer << size) | code; \
-}
-
-#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
-
-#define CHECKBUF15() { \
- if (put_bits > 15) { \
- EMIT_BYTE() \
- EMIT_BYTE() \
+/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write
+ * directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to
+ * encode 0xFF as 0xFF 0x00.
+ */
+#if BIT_BUF_SIZE == 64
+
+#define FLUSH() { \
+ if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
+ EMIT_BYTE(put_buffer >> 56) \
+ EMIT_BYTE(put_buffer >> 48) \
+ EMIT_BYTE(put_buffer >> 40) \
+ EMIT_BYTE(put_buffer >> 32) \
+ EMIT_BYTE(put_buffer >> 24) \
+ EMIT_BYTE(put_buffer >> 16) \
+ EMIT_BYTE(put_buffer >> 8) \
+ EMIT_BYTE(put_buffer ) \
+ } else { \
+ buffer[0] = (JOCTET)(put_buffer >> 56); \
+ buffer[1] = (JOCTET)(put_buffer >> 48); \
+ buffer[2] = (JOCTET)(put_buffer >> 40); \
+ buffer[3] = (JOCTET)(put_buffer >> 32); \
+ buffer[4] = (JOCTET)(put_buffer >> 24); \
+ buffer[5] = (JOCTET)(put_buffer >> 16); \
+ buffer[6] = (JOCTET)(put_buffer >> 8); \
+ buffer[7] = (JOCTET)(put_buffer); \
+ buffer += 8; \
} \
}
-#endif
-
-#define CHECKBUF31() { \
- if (put_bits > 31) { \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
- } \
-}
+#else
-#define CHECKBUF47() { \
- if (put_bits > 47) { \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
- EMIT_BYTE() \
+#define FLUSH() { \
+ if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
+ EMIT_BYTE(put_buffer >> 24) \
+ EMIT_BYTE(put_buffer >> 16) \
+ EMIT_BYTE(put_buffer >> 8) \
+ EMIT_BYTE(put_buffer ) \
+ } else { \
+ buffer[0] = (JOCTET)(put_buffer >> 24); \
+ buffer[1] = (JOCTET)(put_buffer >> 16); \
+ buffer[2] = (JOCTET)(put_buffer >> 8); \
+ buffer[3] = (JOCTET)(put_buffer); \
+ buffer += 4; \
} \
}
-#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
-#error Cannot determine word size
#endif
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
-
-#define EMIT_BITS(code, size) { \
- CHECKBUF47() \
- PUT_BITS(code, size) \
-}
-
-#define EMIT_CODE(code, size) { \
- temp2 &= (((JLONG)1) << nbits) - 1; \
- CHECKBUF31() \
- PUT_BITS(code, size) \
- PUT_BITS(temp2, nbits) \
+/* Fill the bit buffer to capacity with the leading bits from code, then output
+ * the bit buffer and put the remaining bits from code into the bit buffer.
+ */
+#define PUT_AND_FLUSH(code, size) { \
+ put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
+ FLUSH() \
+ free_bits += BIT_BUF_SIZE; \
+ put_buffer = code; \
}
-#else
-
-#define EMIT_BITS(code, size) { \
- PUT_BITS(code, size) \
- CHECKBUF15() \
+/* Insert code into the bit buffer and output the bit buffer if needed.
+ * NOTE: We can't flush with free_bits == 0, since the left shift in
+ * PUT_AND_FLUSH() would have undefined behavior.
+ */
+#define PUT_BITS(code, size) { \
+ free_bits -= size; \
+ if (free_bits < 0) \
+ PUT_AND_FLUSH(code, size) \
+ else \
+ put_buffer = (put_buffer << size) | code; \
}
-#define EMIT_CODE(code, size) { \
- temp2 &= (((JLONG)1) << nbits) - 1; \
- PUT_BITS(code, size) \
- CHECKBUF15() \
- PUT_BITS(temp2, nbits) \
- CHECKBUF15() \
+#define PUT_CODE(code, size) { \
+ temp &= (((JLONG)1) << nbits) - 1; \
+ temp |= code << nbits; \
+ nbits += size; \
+ PUT_BITS(temp, nbits) \
}
-#endif
-
/* Although it is exceedingly rare, it is possible for a Huffman-encoded
* coefficient block to be larger than the 128-byte unencoded block. For each
@@ -444,6 +472,7 @@ dump_buffer(working_state *state)
#define STORE_BUFFER() { \
if (localbuf) { \
+ size_t bytes, bytestocopy; \
bytes = buffer - _buffer; \
buffer = _buffer; \
while (bytes > 0) { \
@@ -466,20 +495,46 @@ dump_buffer(working_state *state)
LOCAL(boolean)
flush_bits(working_state *state)
{
- JOCTET _buffer[BUFSIZE], *buffer;
- size_t put_buffer; int put_bits;
- size_t bytes, bytestocopy; int localbuf = 0;
+ JOCTET _buffer[BUFSIZE], *buffer, temp;
+ simd_bit_buf_type put_buffer; int put_bits;
+ int localbuf = 0;
+
+ if (state->simd) {
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+ put_bits = state->cur.free_bits;
+#else
+ put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
+#endif
+ put_buffer = state->cur.put_buffer.simd;
+ } else {
+ put_bits = BIT_BUF_SIZE - state->cur.free_bits;
+ put_buffer = state->cur.put_buffer.c;
+ }
- put_buffer = state->cur.put_buffer;
- put_bits = state->cur.put_bits;
LOAD_BUFFER()
- /* fill any partial byte with ones */
- PUT_BITS(0x7F, 7)
- while (put_bits >= 8) EMIT_BYTE()
+ while (put_bits >= 8) {
+ put_bits -= 8;
+ temp = (JOCTET)(put_buffer >> put_bits);
+ EMIT_BYTE(temp)
+ }
+ if (put_bits) {
+ /* fill partial byte with ones */
+ temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
+ EMIT_BYTE(temp)
+ }
- state->cur.put_buffer = 0; /* and reset bit-buffer to empty */
- state->cur.put_bits = 0;
+ if (state->simd) { /* and reset bit buffer to empty */
+ state->cur.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+ state->cur.free_bits = 0;
+#else
+ state->cur.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+ } else {
+ state->cur.put_buffer.c = 0;
+ state->cur.free_bits = BIT_BUF_SIZE;
+ }
STORE_BUFFER()
return TRUE;
@@ -493,7 +548,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
c_derived_tbl *dctbl, c_derived_tbl *actbl)
{
JOCTET _buffer[BUFSIZE], *buffer;
- size_t bytes, bytestocopy; int localbuf = 0;
+ int localbuf = 0;
LOAD_BUFFER()
@@ -509,53 +564,41 @@ LOCAL(boolean)
encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
c_derived_tbl *dctbl, c_derived_tbl *actbl)
{
- int temp, temp2, temp3;
- int nbits;
- int r, code, size;
+ int temp, nbits, free_bits;
+ bit_buf_type put_buffer;
JOCTET _buffer[BUFSIZE], *buffer;
- size_t put_buffer; int put_bits;
- int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
- size_t bytes, bytestocopy; int localbuf = 0;
+ int localbuf = 0;
- put_buffer = state->cur.put_buffer;
- put_bits = state->cur.put_bits;
+ free_bits = state->cur.free_bits;
+ put_buffer = state->cur.put_buffer.c;
LOAD_BUFFER()
/* Encode the DC coefficient difference per section F.1.2.1 */
- temp = temp2 = block[0] - last_dc_val;
+ temp = block[0] - last_dc_val;
/* This is a well-known technique for obtaining the absolute value without a
* branch. It is derived from an assembly language technique presented in
* "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
- * Agner Fog.
+ * Agner Fog. This code assumes we are on a two's complement machine.
*/
- temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
- temp ^= temp3;
- temp -= temp3;
-
- /* For a negative input, want temp2 = bitwise complement of abs(input) */
- /* This code assumes we are on a two's complement machine */
- temp2 += temp3;
+ nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
+ temp += nbits;
+ nbits ^= temp;
/* Find the number of bits needed for the magnitude of the coefficient */
- nbits = JPEG_NBITS(temp);
-
- /* Emit the Huffman-coded symbol for the number of bits */
- code = dctbl->ehufco[nbits];
- size = dctbl->ehufsi[nbits];
- EMIT_BITS(code, size)
+ nbits = JPEG_NBITS(nbits);
- /* Mask off any extra bits in code */
- temp2 &= (((JLONG)1) << nbits) - 1;
-
- /* Emit that number of bits of the value, if positive, */
- /* or the complement of its magnitude, if negative. */
- EMIT_BITS(temp2, nbits)
+ /* Emit the Huffman-coded symbol for the number of bits.
+ * Emit that number of bits of the value, if positive,
+ * or the complement of its magnitude, if negative.
+ */
+ PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
/* Encode the AC coefficients per section F.1.2.2 */
- r = 0; /* r = run length of zeros */
+ {
+ int r = 0; /* r = run length of zeros */
/* Manually unroll the k loop to eliminate the counter variable. This
* improves performance greatly on systems with a limited number of
@@ -563,51 +606,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
*/
#define kloop(jpeg_natural_order_of_k) { \
if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
- r++; \
+ r += 16; \
} else { \
- temp2 = temp; \
/* Branch-less absolute value, bitwise complement, etc., same as above */ \
- temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
- temp ^= temp3; \
- temp -= temp3; \
- temp2 += temp3; \
- nbits = JPEG_NBITS_NONZERO(temp); \
+ nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
+ temp += nbits; \
+ nbits ^= temp; \
+ nbits = JPEG_NBITS_NONZERO(nbits); \
/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
- while (r > 15) { \
- EMIT_BITS(code_0xf0, size_0xf0) \
- r -= 16; \
+ while (r >= 16 * 16) { \
+ r -= 16 * 16; \
+ PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
} \
/* Emit Huffman symbol for run length / number of bits */ \
- temp3 = (r << 4) + nbits; \
- code = actbl->ehufco[temp3]; \
- size = actbl->ehufsi[temp3]; \
- EMIT_CODE(code, size) \
+ r += nbits; \
+ PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
r = 0; \
} \
}
- /* One iteration for each value in jpeg_natural_order[] */
- kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
- kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
- kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
- kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
- kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
- kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
- kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
- kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
- kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
- kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
- kloop(55); kloop(62); kloop(63);
-
- /* If the last coef(s) were zero, emit an end-of-block code */
- if (r > 0) {
- code = actbl->ehufco[0];
- size = actbl->ehufsi[0];
- EMIT_BITS(code, size)
+ /* One iteration for each value in jpeg_natural_order[] */
+ kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
+ kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
+ kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
+ kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
+ kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
+ kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
+ kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
+ kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
+ kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
+ kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
+ kloop(55); kloop(62); kloop(63);
+
+ /* If the last coef(s) were zero, emit an end-of-block code */
+ if (r > 0) {
+ PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
+ }
}
- state->cur.put_buffer = put_buffer;
- state->cur.put_bits = put_bits;
+ state->cur.put_buffer.c = put_buffer;
+ state->cur.free_bits = free_bits;
STORE_BUFFER()
return TRUE;
@@ -654,8 +692,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
state.next_output_byte = cinfo->dest->next_output_byte;
state.free_in_buffer = cinfo->dest->free_in_buffer;
- ASSIGN_STATE(state.cur, entropy->saved);
+ state.cur = entropy->saved;
state.cinfo = cinfo;
+ state.simd = entropy->simd;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
@@ -694,7 +733,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
cinfo->dest->next_output_byte = state.next_output_byte;
cinfo->dest->free_in_buffer = state.free_in_buffer;
- ASSIGN_STATE(entropy->saved, state.cur);
+ entropy->saved = state.cur;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
@@ -723,8 +762,9 @@ finish_pass_huff(j_compress_ptr cinfo)
/* Load up working state ... flush_bits needs it */
state.next_output_byte = cinfo->dest->next_output_byte;
state.free_in_buffer = cinfo->dest->free_in_buffer;
- ASSIGN_STATE(state.cur, entropy->saved);
+ state.cur = entropy->saved;
state.cinfo = cinfo;
+ state.simd = entropy->simd;
/* Flush out the last data */
if (!flush_bits(&state))
@@ -733,7 +773,7 @@ finish_pass_huff(j_compress_ptr cinfo)
/* Update state */
cinfo->dest->next_output_byte = state.next_output_byte;
cinfo->dest->free_in_buffer = state.free_in_buffer;
- ASSIGN_STATE(entropy->saved, state.cur);
+ entropy->saved = state.cur;
}
diff --git a/3rdparty/libjpeg-turbo/src/jcphuff.c b/3rdparty/libjpeg-turbo/src/jcphuff.c
index a8b94bed84b8..bd14fc27d5e2 100644
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@@ -4,8 +4,9 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
* Copyright (C) 2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -51,15 +52,19 @@
* flags (this defines __thumb__).
*/
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
+ defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
#ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
+#else
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
+#endif
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
#else
#include "jpeg_nbits_table.h"
@@ -169,24 +174,26 @@ INLINE
METHODDEF(int)
count_zeroes(size_t *x)
{
- int result;
#if defined(HAVE_BUILTIN_CTZL)
+ int result;
result = __builtin_ctzl(*x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD64)
+ unsigned long result;
_BitScanForward64(&result, *x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD)
+ unsigned long result;
_BitScanForward(&result, *x);
*x >>= result;
#else
- result = 0;
+ int result = 0;
while ((*x & 1) == 0) {
++result;
*x >>= 1;
}
#endif
- return result;
+ return (int)result;
}
@@ -860,7 +867,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block,
#define ENCODE_COEFS_AC_REFINE(label) { \
while (zerobits) { \
- int idx = count_zeroes(&zerobits); \
+ idx = count_zeroes(&zerobits); \
r += idx; \
cabsvalue += idx; \
signbits >>= idx; \
@@ -917,7 +924,7 @@ METHODDEF(boolean)
encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
- register int temp, r;
+ register int temp, r, idx;
char *BR_buffer;
unsigned int BR;
int Sl = cinfo->Se - cinfo->Ss + 1;
@@ -968,7 +975,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
if (zerobits) {
int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
- int idx = count_zeroes(&zerobits);
+ idx = count_zeroes(&zerobits);
signbits >>= idx;
idx += diff;
r += idx;
diff --git a/3rdparty/libjpeg-turbo/src/jcsample.c b/3rdparty/libjpeg-turbo/src/jcsample.c
index bd27b84e068a..e8515ebf0fce 100644
--- a/3rdparty/libjpeg-turbo/src/jcsample.c
+++ b/3rdparty/libjpeg-turbo/src/jcsample.c
@@ -6,7 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman for Cendio AB
* Copyright (C) 2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
if (numcols > 0) {
for (row = 0; row < num_rows; row++) {
ptr = image_data[row] + input_cols;
- pixval = ptr[-1]; /* don't need GETJSAMPLE() here */
+ pixval = ptr[-1];
for (count = numcols; count > 0; count--)
*ptr++ = pixval;
}
@@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
for (v = 0; v < v_expand; v++) {
inptr = input_data[inrow + v] + outcol_h;
for (h = 0; h < h_expand; h++) {
- outvalue += (JLONG)GETJSAMPLE(*inptr++);
+ outvalue += (JLONG)(*inptr++);
}
}
*outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
@@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
inptr = input_data[outrow];
bias = 0; /* bias = 0,1,0,1,... for successive samples */
for (outcol = 0; outcol < output_cols; outcol++) {
- *outptr++ =
- (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1);
+ *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
bias ^= 1; /* 0=>1, 1=>0 */
inptr += 2;
}
@@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
bias = 1; /* bias = 1,2,1,2,... for successive samples */
for (outcol = 0; outcol < output_cols; outcol++) {
*outptr++ =
- (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
- GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2);
+ (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
bias ^= 3; /* 1=>2, 2=>1 */
inptr0 += 2; inptr1 += 2;
}
@@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
below_ptr = input_data[inrow + 2];
/* Special case for first column: pretend column -1 is same as column 0 */
- membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
- GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
- neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
- GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
- GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
- GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+ inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
neighsum += neighsum;
- neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
- GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+ neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
for (colctr = output_cols - 2; colctr > 0; colctr--) {
/* sum of pixels directly mapped to this output element */
- membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
- GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
/* sum of edge-neighbor pixels */
- neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
- GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
- GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
- GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+ inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
/* The edge-neighbors count twice as much as corner-neighbors */
neighsum += neighsum;
/* Add in the corner-neighbors */
- neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
- GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+ neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
/* form final output scaled up by 2^16 */
membersum = membersum * memberscale + neighsum * neighscale;
/* round, descale and output it */
@@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
}
/* Special case for last column */
- membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
- GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
- neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
- GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
- GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
- GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+ inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
neighsum += neighsum;
- neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
- GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+ neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
membersum = membersum * memberscale + neighsum * neighscale;
*outptr = (JSAMPLE)((membersum + 32768) >> 16);
@@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
below_ptr = input_data[outrow + 1];
/* Special case for first column */
- colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
- GETJSAMPLE(*inptr);
- membersum = GETJSAMPLE(*inptr++);
- nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
- GETJSAMPLE(*inptr);
+ colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
+ membersum = *inptr++;
+ nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
neighsum = colsum + (colsum - membersum) + nextcolsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
lastcolsum = colsum; colsum = nextcolsum;
for (colctr = output_cols - 2; colctr > 0; colctr--) {
- membersum = GETJSAMPLE(*inptr++);
+ membersum = *inptr++;
above_ptr++; below_ptr++;
- nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
- GETJSAMPLE(*inptr);
+ nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
@@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
}
/* Special case for last column */
- membersum = GETJSAMPLE(*inptr);
+ membersum = *inptr;
neighsum = lastcolsum + (colsum - membersum) + colsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr = (JSAMPLE)((membersum + 32768) >> 16);
diff --git a/3rdparty/libjpeg-turbo/src/jdapistd.c b/3rdparty/libjpeg-turbo/src/jdapistd.c
index 38bd1110d9b3..695a6200992d 100644
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
{
JDIMENSION n;
my_master_ptr master = (my_master_ptr)cinfo->master;
+ JSAMPLE dummy_sample[1] = { 0 };
+ JSAMPROW dummy_row = dummy_sample;
JSAMPARRAY scanlines = NULL;
void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION input_row, JSAMPARRAY output_buf,
@@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
if (cinfo->cconvert && cinfo->cconvert->color_convert) {
color_convert = cinfo->cconvert->color_convert;
cinfo->cconvert->color_convert = noop_convert;
+ /* This just prevents UBSan from complaining about adding 0 to a NULL
+ * pointer. The pointer isn't actually used.
+ */
+ scanlines = &dummy_row;
}
if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
@@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
* decoded coefficients. This is ~5% faster for large subsets, but
* it's tough to tell a difference for smaller images.
*/
+ if (!cinfo->entropy->insufficient_data)
+ cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
(*cinfo->entropy->decode_mcu) (cinfo, NULL);
}
}
diff --git a/3rdparty/libjpeg-turbo/src/jdarith.c b/3rdparty/libjpeg-turbo/src/jdarith.c
index 6002481e242c..7f0d3a785c39 100644
--- a/3rdparty/libjpeg-turbo/src/jdarith.c
+++ b/3rdparty/libjpeg-turbo/src/jdarith.c
@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Developed 1997-2015 by Guido Vollbeding.
* libjpeg-turbo Modifications:
- * Copyright (C) 2015-2018, D. R. Commander.
+ * Copyright (C) 2015-2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo)
if (!(*src->fill_input_buffer) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
src->bytes_in_buffer--;
- return GETJOCTET(*src->next_input_byte++);
+ return *src->next_input_byte++;
}
@@ -665,8 +665,16 @@ start_pass(j_decompress_ptr cinfo)
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
int *coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+ int *prev_coef_bit_ptr =
+ &cinfo->coef_bits[cindex + cinfo->num_components][0];
if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+ for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+ if (cinfo->input_scan_number > 1)
+ prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+ else
+ prev_coef_bit_ptr[coefi] = 0;
+ }
for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
if (cinfo->Ah != expected)
@@ -727,6 +735,7 @@ start_pass(j_decompress_ptr cinfo)
entropy->c = 0;
entropy->a = 0;
entropy->ct = -16; /* force reading 2 initial bytes to fill C */
+ entropy->pub.insufficient_data = FALSE;
/* Initialize restart counter */
entropy->restarts_to_go = cinfo->restart_interval;
@@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo)
int *coef_bit_ptr, ci;
cinfo->coef_bits = (int (*)[DCTSIZE2])
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
- cinfo->num_components * DCTSIZE2 *
+ cinfo->num_components * 2 * DCTSIZE2 *
sizeof(int));
coef_bit_ptr = &cinfo->coef_bits[0][0];
for (ci = 0; ci < cinfo->num_components; ci++)
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.c b/3rdparty/libjpeg-turbo/src/jdcoefct.c
index 2ba6aa11e4d2..15e6cded628e 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@@ -5,7 +5,7 @@
* Copyright (C) 1994-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman for Cendio AB
- * Copyright (C) 2010, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
* Copyright (C) 2015, 2020, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
/* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */
jzero_far((void *)coef->MCU_buffer[0],
(size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK)));
+ if (!cinfo->entropy->insufficient_data)
+ cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
/* Suspension forced; update state counters and exit */
coef->MCU_vert_offset = yoffset;
@@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo)
}
}
}
+ if (!cinfo->entropy->insufficient_data)
+ cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
/* Try to fetch the MCU. */
if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
/* Suspension forced; update state counters and exit */
@@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
#ifdef BLOCK_SMOOTHING_SUPPORTED
/*
- * This code applies interblock smoothing as described by section K.8
- * of the JPEG standard: the first 5 AC coefficients are estimated from
- * the DC values of a DCT block and its 8 neighboring blocks.
+ * This code applies interblock smoothing; the first 9 AC coefficients are
+ * estimated from the DC values of a DCT block and its 24 neighboring blocks.
* We apply smoothing only for progressive JPEG decoding, and only if
* the coefficients it can estimate are not yet known to full precision.
*/
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
+/* Natural-order array positions of the first 9 zigzag-order coefficients */
#define Q01_POS 1
#define Q10_POS 8
#define Q20_POS 16
#define Q11_POS 9
#define Q02_POS 2
+#define Q03_POS 3
+#define Q12_POS 10
+#define Q21_POS 17
+#define Q30_POS 24
/*
* Determine whether block smoothing is applicable and safe.
@@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo)
int ci, coefi;
jpeg_component_info *compptr;
JQUANT_TBL *qtable;
- int *coef_bits;
- int *coef_bits_latch;
+ int *coef_bits, *prev_coef_bits;
+ int *coef_bits_latch, *prev_coef_bits_latch;
if (!cinfo->progressive_mode || cinfo->coef_bits == NULL)
return FALSE;
@@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo)
if (coef->coef_bits_latch == NULL)
coef->coef_bits_latch = (int *)
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
- cinfo->num_components *
+ cinfo->num_components * 2 *
(SAVED_COEFS * sizeof(int)));
coef_bits_latch = coef->coef_bits_latch;
+ prev_coef_bits_latch =
+ &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS];
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* All components' quantization values must already be latched. */
if ((qtable = compptr->quant_table) == NULL)
return FALSE;
- /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+ /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */
if (qtable->quantval[0] == 0 ||
qtable->quantval[Q01_POS] == 0 ||
qtable->quantval[Q10_POS] == 0 ||
qtable->quantval[Q20_POS] == 0 ||
qtable->quantval[Q11_POS] == 0 ||
- qtable->quantval[Q02_POS] == 0)
+ qtable->quantval[Q02_POS] == 0 ||
+ qtable->quantval[Q03_POS] == 0 ||
+ qtable->quantval[Q12_POS] == 0 ||
+ qtable->quantval[Q21_POS] == 0 ||
+ qtable->quantval[Q30_POS] == 0)
return FALSE;
/* DC values must be at least partly known for all components. */
coef_bits = cinfo->coef_bits[ci];
+ prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
if (coef_bits[0] < 0)
return FALSE;
+ coef_bits_latch[0] = coef_bits[0];
/* Block smoothing is helpful if some AC coefficients remain inaccurate. */
- for (coefi = 1; coefi <= 5; coefi++) {
+ for (coefi = 1; coefi < SAVED_COEFS; coefi++) {
+ if (cinfo->input_scan_number > 1)
+ prev_coef_bits_latch[coefi] = prev_coef_bits[coefi];
+ else
+ prev_coef_bits_latch[coefi] = -1;
coef_bits_latch[coefi] = coef_bits[coefi];
if (coef_bits[coefi] != 0)
smoothing_useful = TRUE;
}
coef_bits_latch += SAVED_COEFS;
+ prev_coef_bits_latch += SAVED_COEFS;
}
return smoothing_useful;
@@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
JDIMENSION block_num, last_block_column;
int ci, block_row, block_rows, access_rows;
JBLOCKARRAY buffer;
- JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+ JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
+ JBLOCKROW next_block_row, next_next_block_row;
JSAMPARRAY output_ptr;
JDIMENSION output_col;
jpeg_component_info *compptr;
inverse_DCT_method_ptr inverse_DCT;
- boolean first_row, last_row;
+ boolean change_dc;
JCOEF *workspace;
int *coef_bits;
JQUANT_TBL *quanttbl;
- JLONG Q00, Q01, Q02, Q10, Q11, Q20, num;
- int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9;
+ JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
+ int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
+ DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
+ DC25;
int Al, pred;
/* Keep a local variable to avoid looking it up more than once */
@@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
if (cinfo->input_scan_number == cinfo->output_scan_number) {
/* If input is working on current scan, we ordinarily want it to
* have completed the current row. But if input scan is DC,
- * we want it to keep one row ahead so that next block row's DC
+ * we want it to keep two rows ahead so that next two block rows' DC
* values are up to date.
*/
- JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+ JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0;
if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta)
break;
}
@@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
if (!compptr->component_needed)
continue;
/* Count non-dummy DCT block rows in this iMCU row. */
- if (cinfo->output_iMCU_row < last_iMCU_row) {
+ if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
+ block_rows = compptr->v_samp_factor;
+ access_rows = block_rows * 3; /* this and next two iMCU rows */
+ } else if (cinfo->output_iMCU_row < last_iMCU_row) {
block_rows = compptr->v_samp_factor;
access_rows = block_rows * 2; /* this and next iMCU row */
- last_row = FALSE;
} else {
/* NB: can't use last_row_height here; it is input-side-dependent! */
block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
if (block_rows == 0) block_rows = compptr->v_samp_factor;
access_rows = block_rows; /* this iMCU row only */
- last_row = TRUE;
}
/* Align the virtual buffer for this component. */
- if (cinfo->output_iMCU_row > 0) {
- access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+ if (cinfo->output_iMCU_row > 1) {
+ access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
+ buffer = (*cinfo->mem->access_virt_barray)
+ ((j_common_ptr)cinfo, coef->whole_image[ci],
+ (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
+ (JDIMENSION)access_rows, FALSE);
+ buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
+ } else if (cinfo->output_iMCU_row > 0) {
buffer = (*cinfo->mem->access_virt_barray)
((j_common_ptr)cinfo, coef->whole_image[ci],
(cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
(JDIMENSION)access_rows, FALSE);
buffer += compptr->v_samp_factor; /* point to current iMCU row */
- first_row = FALSE;
} else {
buffer = (*cinfo->mem->access_virt_barray)
((j_common_ptr)cinfo, coef->whole_image[ci],
(JDIMENSION)0, (JDIMENSION)access_rows, FALSE);
- first_row = TRUE;
}
- /* Fetch component-dependent info */
- coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+ /* Fetch component-dependent info.
+ * If the current scan is incomplete, then we use the component-dependent
+ * info from the previous scan.
+ */
+ if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row)
+ coef_bits =
+ coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS);
+ else
+ coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+
+ /* We only do DC interpolation if no AC coefficient data is available. */
+ change_dc =
+ coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 &&
+ coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 &&
+ coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1;
+
quanttbl = compptr->quant_table;
Q00 = quanttbl->quantval[0];
Q01 = quanttbl->quantval[Q01_POS];
@@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
Q20 = quanttbl->quantval[Q20_POS];
Q11 = quanttbl->quantval[Q11_POS];
Q02 = quanttbl->quantval[Q02_POS];
+ if (change_dc) {
+ Q03 = quanttbl->quantval[Q03_POS];
+ Q12 = quanttbl->quantval[Q12_POS];
+ Q21 = quanttbl->quantval[Q21_POS];
+ Q30 = quanttbl->quantval[Q30_POS];
+ }
inverse_DCT = cinfo->idct->inverse_DCT[ci];
output_ptr = output_buf[ci];
/* Loop over all DCT blocks to be processed. */
for (block_row = 0; block_row < block_rows; block_row++) {
buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
- if (first_row && block_row == 0)
+
+ if (block_row > 0 || cinfo->output_iMCU_row > 0)
+ prev_block_row =
+ buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
+ else
prev_block_row = buffer_ptr;
+
+ if (block_row > 1 || cinfo->output_iMCU_row > 1)
+ prev_prev_block_row =
+ buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
+ else
+ prev_prev_block_row = prev_block_row;
+
+ if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
+ next_block_row =
+ buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
else
- prev_block_row = buffer[block_row - 1] +
- cinfo->master->first_MCU_col[ci];
- if (last_row && block_row == block_rows - 1)
next_block_row = buffer_ptr;
+
+ if (block_row < block_rows - 2 ||
+ cinfo->output_iMCU_row < last_iMCU_row - 1)
+ next_next_block_row =
+ buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
else
- next_block_row = buffer[block_row + 1] +
- cinfo->master->first_MCU_col[ci];
+ next_next_block_row = next_block_row;
+
/* We fetch the surrounding DC values using a sliding-register approach.
- * Initialize all nine here so as to do the right thing on narrow pics.
+ * Initialize all 25 here so as to do the right thing on narrow pics.
*/
- DC1 = DC2 = DC3 = (int)prev_block_row[0][0];
- DC4 = DC5 = DC6 = (int)buffer_ptr[0][0];
- DC7 = DC8 = DC9 = (int)next_block_row[0][0];
+ DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0];
+ DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0];
+ DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0];
+ DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0];
+ DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0];
output_col = 0;
last_block_column = compptr->width_in_blocks - 1;
for (block_num = cinfo->master->first_MCU_col[ci];
@@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
/* Fetch current DCT block into workspace so we can modify it. */
jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
/* Update DC values */
- if (block_num < last_block_column) {
- DC3 = (int)prev_block_row[1][0];
- DC6 = (int)buffer_ptr[1][0];
- DC9 = (int)next_block_row[1][0];
+ if (block_num == cinfo->master->first_MCU_col[ci] &&
+ block_num < last_block_column) {
+ DC04 = (int)prev_prev_block_row[1][0];
+ DC09 = (int)prev_block_row[1][0];
+ DC14 = (int)buffer_ptr[1][0];
+ DC19 = (int)next_block_row[1][0];
+ DC24 = (int)next_next_block_row[1][0];
}
- /* Compute coefficient estimates per K.8.
- * An estimate is applied only if coefficient is still zero,
- * and is not known to be fully accurate.
+ if (block_num + 1 < last_block_column) {
+ DC05 = (int)prev_prev_block_row[2][0];
+ DC10 = (int)prev_block_row[2][0];
+ DC15 = (int)buffer_ptr[2][0];
+ DC20 = (int)next_block_row[2][0];
+ DC25 = (int)next_next_block_row[2][0];
+ }
+ /* If DC interpolation is enabled, compute coefficient estimates using
+ * a Gaussian-like kernel, keeping the averages of the DC values.
+ *
+ * If DC interpolation is disabled, compute coefficient estimates using
+ * an algorithm similar to the one described in Section K.8 of the JPEG
+ * standard, except applied to a 5x5 window rather than a 3x3 window.
+ *
+ * An estimate is applied only if the coefficient is still zero and is
+ * not known to be fully accurate.
*/
/* AC01 */
if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) {
- num = 36 * Q00 * (DC4 - DC6);
+ num = Q00 * (change_dc ?
+ (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 -
+ 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 +
+ 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 -
+ DC21 - DC22 + DC24 + DC25) :
+ (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15));
if (num >= 0) {
pred = (int)(((Q01 << 7) + num) / (Q01 << 8));
if (Al > 0 && pred >= (1 << Al))
@@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC10 */
if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) {
- num = 36 * Q00 * (DC2 - DC8);
+ num = Q00 * (change_dc ?
+ (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 +
+ 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 -
+ 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 +
+ 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) :
+ (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23));
if (num >= 0) {
pred = (int)(((Q10 << 7) + num) / (Q10 << 8));
if (Al > 0 && pred >= (1 << Al))
@@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC20 */
if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) {
- num = 9 * Q00 * (DC2 + DC8 - 2 * DC5);
+ num = Q00 * (change_dc ?
+ (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 -
+ 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) :
+ (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23));
if (num >= 0) {
pred = (int)(((Q20 << 7) + num) / (Q20 << 8));
if (Al > 0 && pred >= (1 << Al))
@@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC11 */
if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) {
- num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+ num = Q00 * (change_dc ?
+ (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 +
+ 9 * DC19 + DC21 - DC25) :
+ (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 -
+ DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09));
if (num >= 0) {
pred = (int)(((Q11 << 7) + num) / (Q11 << 8));
if (Al > 0 && pred >= (1 << Al))
@@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC02 */
if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) {
- num = 9 * Q00 * (DC4 + DC6 - 2 * DC5);
+ num = Q00 * (change_dc ?
+ (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 +
+ 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) :
+ (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15));
if (num >= 0) {
pred = (int)(((Q02 << 7) + num) / (Q02 << 8));
if (Al > 0 && pred >= (1 << Al))
@@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
workspace[2] = (JCOEF)pred;
}
+ if (change_dc) {
+ /* AC03 */
+ if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) {
+ num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19);
+ if (num >= 0) {
+ pred = (int)(((Q03 << 7) + num) / (Q03 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ } else {
+ pred = (int)(((Q03 << 7) - num) / (Q03 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ workspace[3] = (JCOEF)pred;
+ }
+ /* AC12 */
+ if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) {
+ num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19);
+ if (num >= 0) {
+ pred = (int)(((Q12 << 7) + num) / (Q12 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ } else {
+ pred = (int)(((Q12 << 7) - num) / (Q12 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ workspace[10] = (JCOEF)pred;
+ }
+ /* AC21 */
+ if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) {
+ num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19);
+ if (num >= 0) {
+ pred = (int)(((Q21 << 7) + num) / (Q21 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ } else {
+ pred = (int)(((Q21 << 7) - num) / (Q21 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ workspace[17] = (JCOEF)pred;
+ }
+ /* AC30 */
+ if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) {
+ num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19);
+ if (num >= 0) {
+ pred = (int)(((Q30 << 7) + num) / (Q30 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ } else {
+ pred = (int)(((Q30 << 7) - num) / (Q30 << 8));
+ if (Al > 0 && pred >= (1 << Al))
+ pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ workspace[24] = (JCOEF)pred;
+ }
+ /* coef_bits[0] is non-negative. Otherwise this function would not
+ * be called.
+ */
+ num = Q00 *
+ (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 -
+ 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 -
+ 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 -
+ 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 -
+ 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25);
+ if (num >= 0) {
+ pred = (int)(((Q00 << 7) + num) / (Q00 << 8));
+ } else {
+ pred = (int)(((Q00 << 7) - num) / (Q00 << 8));
+ pred = -pred;
+ }
+ workspace[0] = (JCOEF)pred;
+ } /* change_dc */
+
/* OK, do the IDCT */
(*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr,
output_col);
/* Advance for next column */
- DC1 = DC2; DC2 = DC3;
- DC4 = DC5; DC5 = DC6;
- DC7 = DC8; DC8 = DC9;
- buffer_ptr++, prev_block_row++, next_block_row++;
+ DC01 = DC02; DC02 = DC03; DC03 = DC04; DC04 = DC05;
+ DC06 = DC07; DC07 = DC08; DC08 = DC09; DC09 = DC10;
+ DC11 = DC12; DC12 = DC13; DC13 = DC14; DC14 = DC15;
+ DC16 = DC17; DC17 = DC18; DC18 = DC19; DC19 = DC20;
+ DC21 = DC22; DC22 = DC23; DC23 = DC24; DC24 = DC25;
+ buffer_ptr++, prev_block_row++, next_block_row++,
+ prev_prev_block_row++, next_next_block_row++;
output_col += compptr->_DCT_scaled_size;
}
output_ptr += compptr->_DCT_scaled_size;
@@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
#ifdef BLOCK_SMOOTHING_SUPPORTED
/* If block smoothing could be used, need a bigger window */
if (cinfo->progressive_mode)
- access_rows *= 3;
+ access_rows *= 5;
#endif
coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.h b/3rdparty/libjpeg-turbo/src/jdcoefct.h
index c4d1943dd4db..9a0e78066364 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.h
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.h
@@ -5,6 +5,7 @@
* Copyright (C) 1994-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman for Cendio AB
+ * Copyright (C) 2020, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*/
@@ -51,7 +52,7 @@ typedef struct {
#ifdef BLOCK_SMOOTHING_SUPPORTED
/* When doing block smoothing, we latch coefficient Al values here */
int *coef_bits_latch;
-#define SAVED_COEFS 6 /* we save coef_bits[0..5] */
+#define SAVED_COEFS 10 /* we save coef_bits[0..9] */
#endif
} my_coef_controller;
diff --git a/3rdparty/libjpeg-turbo/src/jdcol565.c b/3rdparty/libjpeg-turbo/src/jdcol565.c
index 40068ef84fd2..53c7bd9187d4 100644
--- a/3rdparty/libjpeg-turbo/src/jdcol565.c
+++ b/3rdparty/libjpeg-turbo/src/jdcol565.c
@@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
b = range_limit[y + Cbbtab[cb]];
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
- y = GETJSAMPLE(*inptr0);
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ y = *inptr0;
+ cb = *inptr1;
+ cr = *inptr2;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr0++);
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ y = *inptr0++;
+ cb = *inptr1++;
+ cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
- y = GETJSAMPLE(*inptr0);
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ y = *inptr0;
+ cb = *inptr1;
+ cr = *inptr2;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
- r = GETJSAMPLE(*inptr0++);
- g = GETJSAMPLE(*inptr1++);
- b = GETJSAMPLE(*inptr2++);
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
outptr += 2;
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
- r = GETJSAMPLE(*inptr0++);
- g = GETJSAMPLE(*inptr1++);
- b = GETJSAMPLE(*inptr2++);
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
rgb = PACK_SHORT_565(r, g, b);
- r = GETJSAMPLE(*inptr0++);
- g = GETJSAMPLE(*inptr1++);
- b = GETJSAMPLE(*inptr2++);
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
outptr += 4;
}
if (num_cols & 1) {
- r = GETJSAMPLE(*inptr0);
- g = GETJSAMPLE(*inptr1);
- b = GETJSAMPLE(*inptr2);
+ r = *inptr0;
+ g = *inptr1;
+ b = *inptr2;
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
}
@@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
- r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
- g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
- b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+ r = range_limit[DITHER_565_R(*inptr0++, d0)];
+ g = range_limit[DITHER_565_G(*inptr1++, d0)];
+ b = range_limit[DITHER_565_B(*inptr2++, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
outptr += 2;
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
- r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
- g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
- b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+ r = range_limit[DITHER_565_R(*inptr0++, d0)];
+ g = range_limit[DITHER_565_G(*inptr1++, d0)];
+ b = range_limit[DITHER_565_B(*inptr2++, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
- r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
- g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
- b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+ r = range_limit[DITHER_565_R(*inptr0++, d0)];
+ g = range_limit[DITHER_565_G(*inptr1++, d0)];
+ b = range_limit[DITHER_565_B(*inptr2++, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
@@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
- r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
- g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
- b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
+ r = range_limit[DITHER_565_R(*inptr0, d0)];
+ g = range_limit[DITHER_565_G(*inptr1, d0)];
+ b = range_limit[DITHER_565_B(*inptr2, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
}
diff --git a/3rdparty/libjpeg-turbo/src/jdcolext.c b/3rdparty/libjpeg-turbo/src/jdcolext.c
index 72a530107036..863c7a2fbc76 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolext.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolext.c
@@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
- y = GETJSAMPLE(inptr0[col]);
- cb = GETJSAMPLE(inptr1[col]);
- cr = GETJSAMPLE(inptr2[col]);
+ y = inptr0[col];
+ cb = inptr1[col];
+ cr = inptr2[col];
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
outptr[RGB_GREEN] = range_limit[y +
@@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
inptr = input_buf[0][input_row++];
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
- /* We can dispense with GETJSAMPLE() here */
outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
/* Set unused byte to 0xFF so it can be interpreted as an opaque */
/* alpha channel value */
@@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
- /* We can dispense with GETJSAMPLE() here */
outptr[RGB_RED] = inptr0[col];
outptr[RGB_GREEN] = inptr1[col];
outptr[RGB_BLUE] = inptr2[col];
diff --git a/3rdparty/libjpeg-turbo/src/jdcolor.c b/3rdparty/libjpeg-turbo/src/jdcolor.c
index d3ae40c7da9a..8da2b4eaf2e9 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
- r = GETJSAMPLE(inptr0[col]);
- g = GETJSAMPLE(inptr1[col]);
- b = GETJSAMPLE(inptr2[col]);
+ r = inptr0[col];
+ g = inptr1[col];
+ b = inptr2[col];
/* Y */
outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
ctab[b + B_Y_OFF]) >> SCALEBITS);
@@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
- y = GETJSAMPLE(inptr0[col]);
- cb = GETJSAMPLE(inptr1[col]);
- cr = GETJSAMPLE(inptr2[col]);
+ y = inptr0[col];
+ cb = inptr1[col];
+ cr = inptr2[col];
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
@@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
SCALEBITS)))];
outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
/* K passes through unchanged */
- outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
+ outptr[3] = inptr3[col];
outptr += 4;
}
}
diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.c b/3rdparty/libjpeg-turbo/src/jdhuff.c
index a1128178b0a9..f786c1054735 100644
--- a/3rdparty/libjpeg-turbo/src/jdhuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.c
@@ -5,6 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -39,24 +40,6 @@ typedef struct {
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
-/* This macro is to work around compilers with missing or broken
- * structure assignment. You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src) ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
- ((dest).last_dc_val[0] = (src).last_dc_val[0], \
- (dest).last_dc_val[1] = (src).last_dc_val[1], \
- (dest).last_dc_val[2] = (src).last_dc_val[2], \
- (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
typedef struct {
struct jpeg_entropy_decoder pub; /* public fields */
@@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
bytes_in_buffer = cinfo->src->bytes_in_buffer;
}
bytes_in_buffer--;
- c = GETJOCTET(*next_input_byte++);
+ c = *next_input_byte++;
/* If it's 0xFF, check and discard stuffed zero byte */
if (c == 0xFF) {
@@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
bytes_in_buffer = cinfo->src->bytes_in_buffer;
}
bytes_in_buffer--;
- c = GETJOCTET(*next_input_byte++);
+ c = *next_input_byte++;
} while (c == 0xFF);
if (c == 0) {
@@ -405,8 +388,8 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
#define GET_BYTE { \
register int c0, c1; \
- c0 = GETJOCTET(*buffer++); \
- c1 = GETJOCTET(*buffer); \
+ c0 = *buffer++; \
+ c1 = *buffer; \
/* Pre-execute most common case */ \
get_buffer = (get_buffer << 8) | c0; \
bits_left += 8; \
@@ -423,7 +406,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
} \
}
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
/* Pre-fetch 48 bytes, because the holding register is 64-bit */
#define FILL_BIT_BUFFER_FAST \
@@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo)
}
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("signed-integer-overflow"),
+ no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
LOCAL(boolean)
decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
@@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
- ASSIGN_STATE(state, entropy->saved);
+ state = entropy->saved;
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
/* Convert DC difference to actual value, update last_dc_val */
int ci = cinfo->MCU_membership[blkn];
- /* This is really just
- * s += state.last_dc_val[ci];
- * It is written this way in order to shut up UBSan.
+ /* Certain malformed JPEG images produce repeated DC coefficient
+ * differences of 2047 or -2047, which causes state.last_dc_val[ci] to
+ * grow until it overflows or underflows a 32-bit signed integer. This
+ * behavior is, to the best of our understanding, innocuous, and it is
+ * unclear how to work around it without potentially affecting
+ * performance. Thus, we (hopefully temporarily) suppress UBSan integer
+ * overflow errors for this function.
*/
- s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+ s += state.last_dc_val[ci];
state.last_dc_val[ci] = s;
if (block) {
/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
- ASSIGN_STATE(entropy->saved, state);
+ entropy->saved = state;
return TRUE;
}
@@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
buffer = (JOCTET *)br_state.next_input_byte;
- ASSIGN_STATE(state, entropy->saved);
+ state = entropy->saved;
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
int ci = cinfo->MCU_membership[blkn];
- s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+ s += state.last_dc_val[ci];
state.last_dc_val[ci] = s;
if (block)
(*block)[0] = (JCOEF)s;
@@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
br_state.next_input_byte = buffer;
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
- ASSIGN_STATE(entropy->saved, state);
+ entropy->saved = state;
return TRUE;
}
@@ -795,7 +788,8 @@ decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
}
/* Account for restart interval (no-op if not using restarts) */
- entropy->restarts_to_go--;
+ if (cinfo->restart_interval)
+ entropy->restarts_to_go--;
return TRUE;
}
diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.h b/3rdparty/libjpeg-turbo/src/jdhuff.h
index 6a8d90f4027c..cfa0b7f55888 100644
--- a/3rdparty/libjpeg-turbo/src/jdhuff.h
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.h
@@ -4,7 +4,8 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
typedef size_t bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
+#elif defined(__x86_64__) && defined(__ILP32__)
+
+typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE 64 /* size of buffer in bits */
+
#else
typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */
@@ -228,7 +234,10 @@ slowlabel: \
s |= GET_BITS(1); \
nb++; \
} \
- s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
+ if (nb > 16) \
+ s = 0; \
+ else \
+ s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
}
/* Out-of-line case for Huffman code fetching */
diff --git a/3rdparty/libjpeg-turbo/src/jdicc.c b/3rdparty/libjpeg-turbo/src/jdicc.c
index 7224695816b0..a1a5b867ae2b 100644
--- a/3rdparty/libjpeg-turbo/src/jdicc.c
+++ b/3rdparty/libjpeg-turbo/src/jdicc.c
@@ -38,18 +38,18 @@ marker_is_icc(jpeg_saved_marker_ptr marker)
marker->marker == ICC_MARKER &&
marker->data_length >= ICC_OVERHEAD_LEN &&
/* verify the identifying string */
- GETJOCTET(marker->data[0]) == 0x49 &&
- GETJOCTET(marker->data[1]) == 0x43 &&
- GETJOCTET(marker->data[2]) == 0x43 &&
- GETJOCTET(marker->data[3]) == 0x5F &&
- GETJOCTET(marker->data[4]) == 0x50 &&
- GETJOCTET(marker->data[5]) == 0x52 &&
- GETJOCTET(marker->data[6]) == 0x4F &&
- GETJOCTET(marker->data[7]) == 0x46 &&
- GETJOCTET(marker->data[8]) == 0x49 &&
- GETJOCTET(marker->data[9]) == 0x4C &&
- GETJOCTET(marker->data[10]) == 0x45 &&
- GETJOCTET(marker->data[11]) == 0x0;
+ marker->data[0] == 0x49 &&
+ marker->data[1] == 0x43 &&
+ marker->data[2] == 0x43 &&
+ marker->data[3] == 0x5F &&
+ marker->data[4] == 0x50 &&
+ marker->data[5] == 0x52 &&
+ marker->data[6] == 0x4F &&
+ marker->data[7] == 0x46 &&
+ marker->data[8] == 0x49 &&
+ marker->data[9] == 0x4C &&
+ marker->data[10] == 0x45 &&
+ marker->data[11] == 0x0;
}
@@ -102,12 +102,12 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
if (marker_is_icc(marker)) {
if (num_markers == 0)
- num_markers = GETJOCTET(marker->data[13]);
- else if (num_markers != GETJOCTET(marker->data[13])) {
+ num_markers = marker->data[13];
+ else if (num_markers != marker->data[13]) {
WARNMS(cinfo, JWRN_BOGUS_ICC); /* inconsistent num_markers fields */
return FALSE;
}
- seq_no = GETJOCTET(marker->data[12]);
+ seq_no = marker->data[12];
if (seq_no <= 0 || seq_no > num_markers) {
WARNMS(cinfo, JWRN_BOGUS_ICC); /* bogus sequence number */
return FALSE;
@@ -154,7 +154,7 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
JOCTET FAR *src_ptr;
JOCTET *dst_ptr;
unsigned int length;
- seq_no = GETJOCTET(marker->data[12]);
+ seq_no = marker->data[12];
dst_ptr = icc_data + data_offset[seq_no];
src_ptr = marker->data + ICC_OVERHEAD_LEN;
length = data_length[seq_no];
diff --git a/3rdparty/libjpeg-turbo/src/jdmarker.c b/3rdparty/libjpeg-turbo/src/jdmarker.c
index c9c7ef639947..b964c3a1a6ac 100644
--- a/3rdparty/libjpeg-turbo/src/jdmarker.c
+++ b/3rdparty/libjpeg-turbo/src/jdmarker.c
@@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr;
#define INPUT_BYTE(cinfo, V, action) \
MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
- V = GETJOCTET(*next_input_byte++); )
+ V = *next_input_byte++; )
/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
* V should be declared unsigned int or perhaps JLONG.
@@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr;
#define INPUT_2BYTES(cinfo, V, action) \
MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
- V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \
+ V = ((unsigned int)(*next_input_byte++)) << 8; \
MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
- V += GETJOCTET(*next_input_byte++); )
+ V += *next_input_byte++; )
/*
@@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
JLONG totallen = (JLONG)datalen + remaining;
if (datalen >= APP0_DATA_LEN &&
- GETJOCTET(data[0]) == 0x4A &&
- GETJOCTET(data[1]) == 0x46 &&
- GETJOCTET(data[2]) == 0x49 &&
- GETJOCTET(data[3]) == 0x46 &&
- GETJOCTET(data[4]) == 0) {
+ data[0] == 0x4A &&
+ data[1] == 0x46 &&
+ data[2] == 0x49 &&
+ data[3] == 0x46 &&
+ data[4] == 0) {
/* Found JFIF APP0 marker: save info */
cinfo->saw_JFIF_marker = TRUE;
- cinfo->JFIF_major_version = GETJOCTET(data[5]);
- cinfo->JFIF_minor_version = GETJOCTET(data[6]);
- cinfo->density_unit = GETJOCTET(data[7]);
- cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
- cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+ cinfo->JFIF_major_version = data[5];
+ cinfo->JFIF_minor_version = data[6];
+ cinfo->density_unit = data[7];
+ cinfo->X_density = (data[8] << 8) + data[9];
+ cinfo->Y_density = (data[10] << 8) + data[11];
/* Check version.
* Major version must be 1, anything else signals an incompatible change.
* (We used to treat this as an error, but now it's a nonfatal warning,
@@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
/* Validate thumbnail dimensions and issue appropriate messages */
- if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
- TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
- GETJOCTET(data[12]), GETJOCTET(data[13]));
+ if (data[12] | data[13])
+ TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]);
totallen -= APP0_DATA_LEN;
- if (totallen !=
- ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3))
+ if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3))
TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen);
} else if (datalen >= 6 &&
- GETJOCTET(data[0]) == 0x4A &&
- GETJOCTET(data[1]) == 0x46 &&
- GETJOCTET(data[2]) == 0x58 &&
- GETJOCTET(data[3]) == 0x58 &&
- GETJOCTET(data[4]) == 0) {
+ data[0] == 0x4A &&
+ data[1] == 0x46 &&
+ data[2] == 0x58 &&
+ data[3] == 0x58 &&
+ data[4] == 0) {
/* Found JFIF "JFXX" extension APP0 marker */
/* The library doesn't actually do anything with these,
* but we try to produce a helpful trace message.
*/
- switch (GETJOCTET(data[5])) {
+ switch (data[5]) {
case 0x10:
TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen);
break;
@@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen);
break;
default:
- TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
- GETJOCTET(data[5]), (int)totallen);
+ TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen);
break;
}
} else {
@@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
unsigned int version, flags0, flags1, transform;
if (datalen >= APP14_DATA_LEN &&
- GETJOCTET(data[0]) == 0x41 &&
- GETJOCTET(data[1]) == 0x64 &&
- GETJOCTET(data[2]) == 0x6F &&
- GETJOCTET(data[3]) == 0x62 &&
- GETJOCTET(data[4]) == 0x65) {
+ data[0] == 0x41 &&
+ data[1] == 0x64 &&
+ data[2] == 0x6F &&
+ data[3] == 0x62 &&
+ data[4] == 0x65) {
/* Found Adobe APP14 marker */
- version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
- flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
- flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
- transform = GETJOCTET(data[11]);
+ version = (data[5] << 8) + data[6];
+ flags0 = (data[7] << 8) + data[8];
+ flags1 = (data[9] << 8) + data[10];
+ transform = data[11];
TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
cinfo->saw_Adobe_marker = TRUE;
cinfo->Adobe_transform = (UINT8)transform;
diff --git a/3rdparty/libjpeg-turbo/src/jdmaster.c b/3rdparty/libjpeg-turbo/src/jdmaster.c
index b20906438e49..cbc8774b1f2b 100644
--- a/3rdparty/libjpeg-turbo/src/jdmaster.c
+++ b/3rdparty/libjpeg-turbo/src/jdmaster.c
@@ -5,7 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
* Copyright (C) 2013, Linaro Limited.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -22,7 +22,6 @@
#include "jpeglib.h"
#include "jpegcomp.h"
#include "jdmaster.h"
-#include "jsimd.h"
/*
@@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo)
cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
return FALSE;
-#ifdef WITH_SIMD
- /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling
- isn't, then disabling merged upsampling is likely to be faster when
- decompressing YCbCr JPEG images. */
- if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() &&
- jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr &&
- (cinfo->out_color_space == JCS_RGB ||
- (cinfo->out_color_space >= JCS_EXT_RGB &&
- cinfo->out_color_space <= JCS_EXT_ARGB)))
- return FALSE;
-#endif
/* ??? also need to test for upsample-time rescaling, when & if supported */
return TRUE; /* by golly, it'll work... */
#else
@@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo)
*/
cinfo->master->first_iMCU_col = 0;
cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1;
+ cinfo->master->last_good_iMCU_row = 0;
#ifdef D_MULTISCAN_FILES_SUPPORTED
/* If jpeg_start_decompress will read the whole file, initialize
diff --git a/3rdparty/libjpeg-turbo/src/jdmrg565.c b/3rdparty/libjpeg-turbo/src/jdmrg565.c
index 53f1e1670006..980a4e216e4d 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr0);
+ y = *inptr0;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr0);
+ y = *inptr0;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
WRITE_TWO_PIXELS(outptr0, rgb);
outptr0 += 4;
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr00);
+ y = *inptr00;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr0 = (INT16)rgb;
- y = GETJSAMPLE(*inptr01);
+ y = *inptr01;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
WRITE_TWO_PIXELS(outptr0, rgb);
outptr0 += 4;
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];
d1 = DITHER_ROTATE(d1);
rgb = PACK_SHORT_565(r, g, b);
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];
@@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr00);
+ y = *inptr00;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr0 = (INT16)rgb;
- y = GETJSAMPLE(*inptr01);
+ y = *inptr01;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];
diff --git a/3rdparty/libjpeg-turbo/src/jdmrgext.c b/3rdparty/libjpeg-turbo/src/jdmrgext.c
index c9a44d8219c2..9bf4f1a307f3 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr[RGB_ALPHA] = 0xFF;
#endif
outptr += RGB_PIXELSIZE;
- y = GETJSAMPLE(*inptr0++);
+ y = *inptr0++;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr0);
+ y = *inptr0;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
- cb = GETJSAMPLE(*inptr1++);
- cr = GETJSAMPLE(*inptr2++);
+ cb = *inptr1++;
+ cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
@@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr0[RGB_ALPHA] = 0xFF;
#endif
outptr0 += RGB_PIXELSIZE;
- y = GETJSAMPLE(*inptr00++);
+ y = *inptr00++;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
@@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr0[RGB_ALPHA] = 0xFF;
#endif
outptr0 += RGB_PIXELSIZE;
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
@@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr1[RGB_ALPHA] = 0xFF;
#endif
outptr1 += RGB_PIXELSIZE;
- y = GETJSAMPLE(*inptr01++);
+ y = *inptr01++;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
@@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
- cb = GETJSAMPLE(*inptr1);
- cr = GETJSAMPLE(*inptr2);
+ cb = *inptr1;
+ cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
- y = GETJSAMPLE(*inptr00);
+ y = *inptr00;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
#ifdef RGB_ALPHA
outptr0[RGB_ALPHA] = 0xFF;
#endif
- y = GETJSAMPLE(*inptr01);
+ y = *inptr01;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
diff --git a/3rdparty/libjpeg-turbo/src/jdphuff.c b/3rdparty/libjpeg-turbo/src/jdphuff.c
index 9e82636bbd12..c6d82ca14b8c 100644
--- a/3rdparty/libjpeg-turbo/src/jdphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdphuff.c
@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2018, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -41,25 +41,6 @@ typedef struct {
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
-/* This macro is to work around compilers with missing or broken
- * structure assignment. You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src) ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
- ((dest).EOBRUN = (src).EOBRUN, \
- (dest).last_dc_val[0] = (src).last_dc_val[0], \
- (dest).last_dc_val[1] = (src).last_dc_val[1], \
- (dest).last_dc_val[2] = (src).last_dc_val[2], \
- (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
typedef struct {
struct jpeg_entropy_decoder pub; /* public fields */
@@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
boolean is_DC_band, bad;
int ci, coefi, tbl;
d_derived_tbl **pdtbl;
- int *coef_bit_ptr;
+ int *coef_bit_ptr, *prev_coef_bit_ptr;
jpeg_component_info *compptr;
is_DC_band = (cinfo->Ss == 0);
@@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
int cindex = cinfo->cur_comp_info[ci]->component_index;
coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+ prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0];
if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+ for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+ if (cinfo->input_scan_number > 1)
+ prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+ else
+ prev_coef_bit_ptr[coefi] = 0;
+ }
for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
if (cinfo->Ah != expected)
@@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
- ASSIGN_STATE(state, entropy->saved);
+ state = entropy->saved;
/* Outer loop handles each block in the MCU */
@@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
- ASSIGN_STATE(entropy->saved, state);
+ entropy->saved = state;
}
/* Account for restart interval (no-op if not using restarts) */
- entropy->restarts_to_go--;
+ if (cinfo->restart_interval)
+ entropy->restarts_to_go--;
return TRUE;
}
@@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
}
/* Account for restart interval (no-op if not using restarts) */
- entropy->restarts_to_go--;
+ if (cinfo->restart_interval)
+ entropy->restarts_to_go--;
return TRUE;
}
@@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
/* Account for restart interval (no-op if not using restarts) */
- entropy->restarts_to_go--;
+ if (cinfo->restart_interval)
+ entropy->restarts_to_go--;
return TRUE;
}
@@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
}
/* Account for restart interval (no-op if not using restarts) */
- entropy->restarts_to_go--;
+ if (cinfo->restart_interval)
+ entropy->restarts_to_go--;
return TRUE;
@@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo)
/* Create progression status table */
cinfo->coef_bits = (int (*)[DCTSIZE2])
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
- cinfo->num_components * DCTSIZE2 *
+ cinfo->num_components * 2 * DCTSIZE2 *
sizeof(int));
coef_bit_ptr = &cinfo->coef_bits[0][0];
for (ci = 0; ci < cinfo->num_components; ci++)
diff --git a/3rdparty/libjpeg-turbo/src/jdsample.c b/3rdparty/libjpeg-turbo/src/jdsample.c
index 50a68b301318..eaad72a03089 100644
--- a/3rdparty/libjpeg-turbo/src/jdsample.c
+++ b/3rdparty/libjpeg-turbo/src/jdsample.c
@@ -8,7 +8,7 @@
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2019, Arm Limited.
+ * Copyright (C) 2019-2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
- invalue = *inptr++; /* don't need GETJSAMPLE() here */
+ invalue = *inptr++;
for (h = h_expand; h > 0; h--) {
*outptr++ = invalue;
}
@@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[inrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
- invalue = *inptr++; /* don't need GETJSAMPLE() here */
+ invalue = *inptr++;
*outptr++ = invalue;
*outptr++ = invalue;
}
@@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
- invalue = *inptr++; /* don't need GETJSAMPLE() here */
+ invalue = *inptr++;
*outptr++ = invalue;
*outptr++ = invalue;
}
@@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
inptr = input_data[inrow];
outptr = output_data[inrow];
/* Special case for first column */
- invalue = GETJSAMPLE(*inptr++);
+ invalue = *inptr++;
*outptr++ = (JSAMPLE)invalue;
- *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+ *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
/* General case: 3/4 * nearer pixel + 1/4 * further pixel */
- invalue = GETJSAMPLE(*inptr++) * 3;
- *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
- *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+ invalue = (*inptr++) * 3;
+ *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
+ *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
}
/* Special case for last column */
- invalue = GETJSAMPLE(*inptr);
- *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+ invalue = *inptr;
+ *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
*outptr++ = (JSAMPLE)invalue;
}
}
@@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow++];
for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
- thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+ thiscolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
}
}
@@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow++];
/* Special case for first column */
- thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
- nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+ thiscolsum = (*inptr0++) * 3 + (*inptr1++);
+ nextcolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
lastcolsum = thiscolsum; thiscolsum = nextcolsum;
@@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
- nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+ nextcolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
lastcolsum = thiscolsum; thiscolsum = nextcolsum;
@@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo)
} else if (h_in_group == h_out_group &&
v_in_group * 2 == v_out_group && do_fancy) {
/* Non-fancy upsampling is handled by the generic method */
- upsample->methods[ci] = h1v2_fancy_upsample;
+#if defined(__arm__) || defined(__aarch64__) || \
+ defined(_M_ARM) || defined(_M_ARM64)
+ if (jsimd_can_h1v2_fancy_upsample())
+ upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
+ else
+#endif
+ upsample->methods[ci] = h1v2_fancy_upsample;
upsample->pub.need_context_rows = TRUE;
} else if (h_in_group * 2 == h_out_group &&
v_in_group * 2 == v_out_group) {
diff --git a/3rdparty/libjpeg-turbo/src/jerror.h b/3rdparty/libjpeg-turbo/src/jerror.h
index 933a3690fdf4..4476df2c934b 100644
--- a/3rdparty/libjpeg-turbo/src/jerror.h
+++ b/3rdparty/libjpeg-turbo/src/jerror.h
@@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
#endif
#endif
JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
#ifdef JMAKE_ENUM_LIST
@@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
(cinfo)->err->msg_parm.i[2] = (p3), \
(cinfo)->err->msg_parm.i[3] = (p4), \
(*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \
+ ((cinfo)->err->msg_code = (code), \
+ (cinfo)->err->msg_parm.i[0] = (p1), \
+ (cinfo)->err->msg_parm.i[1] = (p2), \
+ (cinfo)->err->msg_parm.i[2] = (p3), \
+ (cinfo)->err->msg_parm.i[3] = (p4), \
+ (cinfo)->err->msg_parm.i[4] = (p5), \
+ (cinfo)->err->msg_parm.i[5] = (p6), \
+ (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
#define ERREXITS(cinfo, code, str) \
((cinfo)->err->msg_code = (code), \
strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
diff --git a/3rdparty/libjpeg-turbo/src/jidctint.c b/3rdparty/libjpeg-turbo/src/jidctint.c
index 50f385da3329..bb0874801920 100644
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@@ -3,7 +3,7 @@
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
+ * Modification developed 2002-2018 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
/*
* Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
+ * producing a reduced-size 7x7 output block.
*
* Optimized algorithm with 12 multiplications in the 1-D kernel.
* cK represents sqrt(2) * cos(K*pi/14).
@@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
/*
* Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 11x11 output block.
+ * producing an 11x11 output block.
*
* Optimized algorithm with 24 multiplications in the 1-D kernel.
* cK represents sqrt(2) * cos(K*pi/22).
@@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
/* Add fudge factor here for final descale. */
- tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1);
+ tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
diff --git a/3rdparty/libjpeg-turbo/src/jmorecfg.h b/3rdparty/libjpeg-turbo/src/jmorecfg.h
index aa29f0f9f13e..fb3a9cf411cc 100644
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@@ -43,25 +43,11 @@
#if BITS_IN_JSAMPLE == 8
/* JSAMPLE should be the smallest type that will hold the values 0..255.
- * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
*/
-#ifdef HAVE_UNSIGNED_CHAR
-
typedef unsigned char JSAMPLE;
#define GETJSAMPLE(value) ((int)(value))
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JSAMPLE;
-#ifdef __CHAR_UNSIGNED__
-#define GETJSAMPLE(value) ((int)(value))
-#else
-#define GETJSAMPLE(value) ((int)(value) & 0xFF)
-#endif /* __CHAR_UNSIGNED__ */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
#define MAXJSAMPLE 255
#define CENTERJSAMPLE 128
@@ -97,22 +83,9 @@ typedef short JCOEF;
* managers, this is also the data type passed to fread/fwrite.
*/
-#ifdef HAVE_UNSIGNED_CHAR
-
typedef unsigned char JOCTET;
#define GETJOCTET(value) (value)
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JOCTET;
-#ifdef __CHAR_UNSIGNED__
-#define GETJOCTET(value) (value)
-#else
-#define GETJOCTET(value) ((value) & 0xFF)
-#endif /* __CHAR_UNSIGNED__ */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
/* These typedefs are used for various table entries and so forth.
* They must be at least as wide as specified; but making them too big
@@ -123,15 +96,7 @@ typedef char JOCTET;
/* UINT8 must hold at least the values 0..255. */
-#ifdef HAVE_UNSIGNED_CHAR
typedef unsigned char UINT8;
-#else /* not HAVE_UNSIGNED_CHAR */
-#ifdef __CHAR_UNSIGNED__
-typedef char UINT8;
-#else /* not __CHAR_UNSIGNED__ */
-typedef short UINT8;
-#endif /* __CHAR_UNSIGNED__ */
-#endif /* HAVE_UNSIGNED_CHAR */
/* UINT16 must hold at least the values 0..65535. */
diff --git a/3rdparty/libjpeg-turbo/src/jpegint.h b/3rdparty/libjpeg-turbo/src/jpegint.h
index ad36ca8b5605..195fbcb9b675 100644
--- a/3rdparty/libjpeg-turbo/src/jpegint.h
+++ b/3rdparty/libjpeg-turbo/src/jpegint.h
@@ -5,7 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 1997-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, D. R. Commander.
+ * Copyright (C) 2015-2016, 2019, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@@ -158,6 +158,9 @@ struct jpeg_decomp_master {
JDIMENSION first_MCU_col[MAX_COMPONENTS];
JDIMENSION last_MCU_col[MAX_COMPONENTS];
boolean jinit_upsampler_no_alloc;
+
+ /* Last iMCU row that was successfully decoded */
+ JDIMENSION last_good_iMCU_row;
};
/* Input control module */
diff --git a/3rdparty/libjpeg-turbo/src/jquant1.c b/3rdparty/libjpeg-turbo/src/jquant1.c
index 40bbb28cc7f6..73b83e16e5cc 100644
--- a/3rdparty/libjpeg-turbo/src/jquant1.c
+++ b/3rdparty/libjpeg-turbo/src/jquant1.c
@@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
for (col = width; col > 0; col--) {
pixcode = 0;
for (ci = 0; ci < nc; ci++) {
- pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+ pixcode += colorindex[ci][*ptrin++];
}
*ptrout++ = (JSAMPLE)pixcode;
}
@@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
ptrin = input_buf[row];
ptrout = output_buf[row];
for (col = width; col > 0; col--) {
- pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
- pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
- pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+ pixcode = colorindex0[*ptrin++];
+ pixcode += colorindex1[*ptrin++];
+ pixcode += colorindex2[*ptrin++];
*ptrout++ = (JSAMPLE)pixcode;
}
}
@@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* required amount of padding.
*/
*output_ptr +=
- colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]];
+ colorindex_ci[*input_ptr + dither[col_index]];
input_ptr += nc;
output_ptr++;
col_index = (col_index + 1) & ODITHER_MASK;
@@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
col_index = 0;
for (col = width; col > 0; col--) {
- pixcode =
- GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]);
- pixcode +=
- GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]);
- pixcode +=
- GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]);
+ pixcode = colorindex0[(*input_ptr++) + dither0[col_index]];
+ pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
+ pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
*output_ptr++ = (JSAMPLE)pixcode;
col_index = (col_index + 1) & ODITHER_MASK;
}
@@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* The maximum error is +- MAXJSAMPLE; this sets the required size
* of the range_limit array.
*/
- cur += GETJSAMPLE(*input_ptr);
- cur = GETJSAMPLE(range_limit[cur]);
+ cur += *input_ptr;
+ cur = range_limit[cur];
/* Select output value, accumulate into output code for this pixel */
- pixcode = GETJSAMPLE(colorindex_ci[cur]);
+ pixcode = colorindex_ci[cur];
*output_ptr += (JSAMPLE)pixcode;
/* Compute actual representation error at this pixel */
/* Note: we can do this even though we don't have the final */
/* pixel code, because the colormap is orthogonal. */
- cur -= GETJSAMPLE(colormap_ci[pixcode]);
+ cur -= colormap_ci[pixcode];
/* Compute error fractions to be propagated to adjacent pixels.
* Add these into the running sums, and simultaneously shift the
* next-line error sums left by 1 column.
diff --git a/3rdparty/libjpeg-turbo/src/jquant2.c b/3rdparty/libjpeg-turbo/src/jquant2.c
index 6570613bb9f2..44efb18cadf1 100644
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
ptr = input_buf[row];
for (col = width; col > 0; col--) {
/* get pixel value and index into the histogram */
- histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
- [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
- [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+ histp = &histogram[ptr[0] >> C0_SHIFT]
+ [ptr[1] >> C1_SHIFT]
+ [ptr[2] >> C2_SHIFT];
/* increment, check for overflow and undo increment if so. */
if (++(*histp) <= 0)
(*histp)--;
@@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
for (i = 0; i < numcolors; i++) {
/* We compute the squared-c0-distance term, then add in the other two. */
- x = GETJSAMPLE(cinfo->colormap[0][i]);
+ x = cinfo->colormap[0][i];
if (x < minc0) {
tdist = (x - minc0) * C0_SCALE;
min_dist = tdist * tdist;
@@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
}
}
- x = GETJSAMPLE(cinfo->colormap[1][i]);
+ x = cinfo->colormap[1][i];
if (x < minc1) {
tdist = (x - minc1) * C1_SCALE;
min_dist += tdist * tdist;
@@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
}
}
- x = GETJSAMPLE(cinfo->colormap[2][i]);
+ x = cinfo->colormap[2][i];
if (x < minc2) {
tdist = (x - minc2) * C2_SCALE;
min_dist += tdist * tdist;
@@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
#define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE)
for (i = 0; i < numcolors; i++) {
- icolor = GETJSAMPLE(colorlist[i]);
+ icolor = colorlist[i];
/* Compute (square of) distance from minc0/c1/c2 to this color */
- inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+ inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
dist0 = inc0 * inc0;
- inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+ inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
dist0 += inc1 * inc1;
- inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+ inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
dist0 += inc2 * inc2;
/* Form the initial difference increments */
inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
@@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
cachep = &histogram[c0 + ic0][c1 + ic1][c2];
for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
- *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1);
+ *cachep++ = (histcell)((*cptr++) + 1);
}
}
}
@@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[row];
for (col = width; col > 0; col--) {
/* get pixel value and index into the cache */
- c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
- c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
- c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+ c0 = (*inptr++) >> C0_SHIFT;
+ c1 = (*inptr++) >> C1_SHIFT;
+ c2 = (*inptr++) >> C2_SHIFT;
cachep = &histogram[c0][c1][c2];
/* If we have not seen this color before, find nearest colormap entry */
/* and update the cache */
@@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* The maximum error is +- MAXJSAMPLE (or less with error limiting);
* this sets the required size of the range_limit array.
*/
- cur0 += GETJSAMPLE(inptr[0]);
- cur1 += GETJSAMPLE(inptr[1]);
- cur2 += GETJSAMPLE(inptr[2]);
- cur0 = GETJSAMPLE(range_limit[cur0]);
- cur1 = GETJSAMPLE(range_limit[cur1]);
- cur2 = GETJSAMPLE(range_limit[cur2]);
+ cur0 += inptr[0];
+ cur1 += inptr[1];
+ cur2 += inptr[2];
+ cur0 = range_limit[cur0];
+ cur1 = range_limit[cur1];
+ cur2 = range_limit[cur2];
/* Index into the cache with adjusted pixel value */
cachep =
&histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT];
@@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
register int pixcode = *cachep - 1;
*outptr = (JSAMPLE)pixcode;
/* Compute representation error for this pixel */
- cur0 -= GETJSAMPLE(colormap0[pixcode]);
- cur1 -= GETJSAMPLE(colormap1[pixcode]);
- cur2 -= GETJSAMPLE(colormap2[pixcode]);
+ cur0 -= colormap0[pixcode];
+ cur1 -= colormap1[pixcode];
+ cur2 -= colormap2[pixcode];
}
/* Compute error fractions to be propagated to adjacent pixels.
* Add these into the running sums, and simultaneously shift the
diff --git a/3rdparty/libjpeg-turbo/src/jsimd.h b/3rdparty/libjpeg-turbo/src/jsimd.h
index 51e2b8c89de3..6c203655ef84 100644
--- a/3rdparty/libjpeg-turbo/src/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/jsimd.h
@@ -4,6 +4,7 @@
* Copyright 2009 Pierre Ossman for Cendio AB
* Copyright (C) 2011, 2014, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
+EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
jpeg_component_info *compptr,
@@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
jpeg_component_info *compptr,
JSAMPARRAY input_data,
JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
+ jpeg_component_info *compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY *output_data_ptr);
EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
diff --git a/3rdparty/libjpeg-turbo/src/jsimd_none.c b/3rdparty/libjpeg-turbo/src/jsimd_none.c
index 3cb6c80f8aab..5b38a9fb5c99 100644
--- a/3rdparty/libjpeg-turbo/src/jsimd_none.c
+++ b/3rdparty/libjpeg-turbo/src/jsimd_none.c
@@ -4,6 +4,7 @@
* Copyright 2009 Pierre Ossman for Cendio AB
* Copyright (C) 2009-2011, 2014, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void)
return 0;
}
+GLOBAL(int)
+jsimd_can_h1v2_fancy_upsample(void)
+{
+ return 0;
+}
+
GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
@@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
{
}
+GLOBAL(void)
+jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+{
+}
+
GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)
{
diff --git a/3rdparty/libjpeg-turbo/src/jversion.h b/3rdparty/libjpeg-turbo/src/jversion.h
index 4462b941048d..2ab534af4147 100644
--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/src/jversion.h
@@ -2,9 +2,9 @@
* jversion.h
*
* This file was part of the Independent JPEG Group's software:
- * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
* libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2020, D. R. Commander.
+ * Copyright (C) 2010, 2012-2021, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -37,9 +37,9 @@
*/
#define JCOPYRIGHT \
- "Copyright (C) 2009-2020 D. R. Commander\n" \
+ "Copyright (C) 2009-2021 D. R. Commander\n" \
"Copyright (C) 2015, 2020 Google, Inc.\n" \
- "Copyright (C) 2019 Arm Limited\n" \
+ "Copyright (C) 2019-2020 Arm Limited\n" \
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015 Intel Corporation\n" \
@@ -48,7 +48,7 @@
"Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
- "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
+ "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
#define JCOPYRIGHT_SHORT \
- "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
+ "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"
diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt
index 80ab0b86ab76..9160e2024ca0 100644
--- a/3rdparty/libwebp/CMakeLists.txt
+++ b/3rdparty/libwebp/CMakeLists.txt
@@ -32,7 +32,9 @@ endif()
# Define the library target:
# ----------------------------------------------------------------------------------
-add_definitions(-DWEBP_USE_THREAD)
+if(NOT OPENCV_DISABLE_THREAD_SUPPORT)
+ add_definitions(-DWEBP_USE_THREAD)
+endif()
add_library(${WEBP_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
if(ANDROID)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7bd4bf5f0dcc..dd862bb1549e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -467,6 +467,7 @@ OCV_OPTION(BUILD_ANDROID_SERVICE "Build OpenCV Manager for Google Play" OFF I
OCV_OPTION(BUILD_CUDA_STUBS "Build CUDA modules stubs when no CUDA SDK" OFF IF (NOT APPLE_FRAMEWORK) )
OCV_OPTION(BUILD_JAVA "Enable Java support" (ANDROID OR NOT CMAKE_CROSSCOMPILING) IF (ANDROID OR (NOT APPLE_FRAMEWORK AND NOT WINRT)) )
OCV_OPTION(BUILD_OBJC "Enable Objective-C support" ON IF APPLE_FRAMEWORK )
+OCV_OPTION(BUILD_KOTLIN_EXTENSIONS "Build Kotlin extensions (Android)" ON IF ANDROID )
# OpenCV installation options
# ===================================================
@@ -510,10 +511,11 @@ OCV_OPTION(CV_TRACE "Enable OpenCV code trace" ON)
OCV_OPTION(OPENCV_GENERATE_SETUPVARS "Generate setup_vars* scripts" ON IF (NOT ANDROID AND NOT APPLE_FRAMEWORK) )
OCV_OPTION(ENABLE_CONFIG_VERIFICATION "Fail build if actual configuration doesn't match requested (WITH_XXX != HAVE_XXX)" OFF)
OCV_OPTION(OPENCV_ENABLE_MEMALIGN "Enable posix_memalign or memalign usage" ON)
+OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF)
+OCV_OPTION(OPENCV_DISABLE_THREAD_SUPPORT "Build the library without multi-threaded code." OFF)
OCV_OPTION(ENABLE_PYLINT "Add target with Pylint checks" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
OCV_OPTION(ENABLE_FLAKE8 "Add target with Python flake8 checker" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
-OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF)
if(ENABLE_IMPL_COLLECTION)
add_definitions(-DCV_COLLECT_IMPL_DATA)
@@ -665,6 +667,11 @@ if(UNIX)
set(HAVE_PTHREAD 1)
endif()
+ # Ensure that libpthread is not listed as one of the libraries to pass to the linker.
+ if (OPENCV_DISABLE_THREAD_SUPPORT)
+ list(REMOVE_ITEM OPENCV_LINKER_LIBS pthread)
+ endif()
+
if(OPENCV_ENABLE_MEMALIGN)
CHECK_SYMBOL_EXISTS(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN)
CHECK_INCLUDE_FILE(malloc.h HAVE_MALLOC_H)
@@ -1458,6 +1465,15 @@ ocv_build_features_string(parallel_status EXCLUSIVE
ELSE "none")
status("")
status(" Parallel framework:" "${parallel_status}")
+if (OPENCV_DISABLE_THREAD_SUPPORT)
+ status("" "Multi thread code explicitly disabled with OPENCV_DISABLE_THREAD_SUPPORT.")
+ if(HAVE_PTHREADS_PF OR HAVE_HPX OR HAVE_OPENMP OR HAVE_GCD OR HAVE_CONCURRENCY)
+ message(FATAL_ERROR "Not all parallel frameworks have been disabled (using ${parallel_status}).")
+ endif()
+ if(HAVE_PTHREAD)
+ message(FATAL_ERROR "Thread execution might be in use in some component.")
+ endif()
+endif()
if(CV_TRACE OR OPENCV_TRACE)
ocv_build_features_string(trace_status EXCLUSIVE
diff --git a/apps/model-diagnostics/model_diagnostics.cpp b/apps/model-diagnostics/model_diagnostics.cpp
index 2ffeaa1ea5b9..d3934577aec6 100644
--- a/apps/model-diagnostics/model_diagnostics.cpp
+++ b/apps/model-diagnostics/model_diagnostics.cpp
@@ -1,6 +1,6 @@
/*************************************************
USAGE:
-./model_diagnostics -m
+./model_diagnostics -m
**************************************************/
#include
#include
@@ -32,7 +32,7 @@ static std::string checkFileExists(const std::string& fileName)
}
std::string diagnosticKeys =
- "{ model m | | Path to the model .onnx file. }"
+ "{ model m | | Path to the model file. }"
"{ config c | | Path to the model configuration file. }"
"{ framework f | | [Optional] Name of the model framework. }";
@@ -41,7 +41,7 @@ std::string diagnosticKeys =
int main( int argc, const char** argv )
{
CommandLineParser argParser(argc, argv, diagnosticKeys);
- argParser.about("Use this tool to run the diagnostics of provided ONNX model"
+ argParser.about("Use this tool to run the diagnostics of provided ONNX/TF model"
"to obtain the information about its support (supported layers).");
if (argc == 1)
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index a161b6eb8b64..2917dd33d5ee 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -178,8 +178,17 @@ if(CV_GCC OR CV_CLANG)
add_extra_compiler_option(-Wno-long-long)
endif()
- # We need pthread's
- if(UNIX AND NOT ANDROID AND NOT (APPLE AND CV_CLANG)) # TODO
+ # We need pthread's, unless we have explicitly disabled multi-thread execution.
+ if(NOT OPENCV_DISABLE_THREAD_SUPPORT
+ AND (
+ (UNIX
+ AND NOT ANDROID
+ AND NOT (APPLE AND CV_CLANG)
+ AND NOT EMSCRIPTEN
+ )
+ OR (EMSCRIPTEN AND WITH_PTHREADS_PF) # https://github.com/opencv/opencv/issues/20285
+ )
+ ) # TODO
add_extra_compiler_option(-pthread)
endif()
diff --git a/cmake/OpenCVDetectHalide.cmake b/cmake/OpenCVDetectHalide.cmake
index 790f69205662..4828c299aead 100644
--- a/cmake/OpenCVDetectHalide.cmake
+++ b/cmake/OpenCVDetectHalide.cmake
@@ -9,9 +9,14 @@ set(HALIDE_ROOT_DIR "${HALIDE_ROOT_DIR}" CACHE PATH "Halide root directory")
if(NOT HAVE_HALIDE)
find_package(Halide QUIET) # Try CMake-based config files
if(Halide_FOUND)
- set(HALIDE_INCLUDE_DIRS "${Halide_INCLUDE_DIRS}" CACHE PATH "Halide include directories" FORCE)
- set(HALIDE_LIBRARIES "${Halide_LIBRARIES}" CACHE PATH "Halide libraries" FORCE)
- set(HAVE_HALIDE TRUE)
+ if(TARGET Halide::Halide) # modern Halide scripts defines imported target
+ set(HALIDE_INCLUDE_DIRS "")
+ set(HALIDE_LIBRARIES "Halide::Halide")
+ set(HAVE_HALIDE TRUE)
+ else()
+ # using HALIDE_INCLUDE_DIRS / Halide_LIBRARIES
+ set(HAVE_HALIDE TRUE)
+ endif()
endif()
endif()
@@ -28,18 +33,15 @@ if(NOT HAVE_HALIDE AND HALIDE_ROOT_DIR)
)
if(HALIDE_LIBRARY AND HALIDE_INCLUDE_DIR)
# TODO try_compile
- set(HALIDE_INCLUDE_DIRS "${HALIDE_INCLUDE_DIR}" CACHE PATH "Halide include directories" FORCE)
- set(HALIDE_LIBRARIES "${HALIDE_LIBRARY}" CACHE PATH "Halide libraries" FORCE)
+ set(HALIDE_INCLUDE_DIRS "${HALIDE_INCLUDE_DIR}")
+ set(HALIDE_LIBRARIES "${HALIDE_LIBRARY}")
set(HAVE_HALIDE TRUE)
endif()
- if(NOT HAVE_HALIDE)
- ocv_clear_vars(HALIDE_LIBRARIES HALIDE_INCLUDE_DIRS CACHE)
- endif()
endif()
if(HAVE_HALIDE)
- include_directories(${HALIDE_INCLUDE_DIRS})
+ if(HALIDE_INCLUDE_DIRS)
+ include_directories(${HALIDE_INCLUDE_DIRS})
+ endif()
list(APPEND OPENCV_LINKER_LIBS ${HALIDE_LIBRARIES})
-else()
- ocv_clear_vars(HALIDE_INCLUDE_DIRS HALIDE_LIBRARIES)
endif()
diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake
index 216c02c3ccc2..b9fd07bbfbb1 100644
--- a/cmake/OpenCVDetectInferenceEngine.cmake
+++ b/cmake/OpenCVDetectInferenceEngine.cmake
@@ -134,12 +134,21 @@ endif()
# Add more features to the target
if(INF_ENGINE_TARGET)
- if(NOT INF_ENGINE_RELEASE)
- message(WARNING "InferenceEngine version has not been set, 2021.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
+ if(DEFINED InferenceEngine_VERSION)
+ message(STATUS "InferenceEngine: ${InferenceEngine_VERSION}")
+ if(NOT INF_ENGINE_RELEASE AND NOT (InferenceEngine_VERSION VERSION_LESS "2021.4"))
+ math(EXPR INF_ENGINE_RELEASE_INIT "${InferenceEngine_VERSION_MAJOR} * 1000000 + ${InferenceEngine_VERSION_MINOR} * 10000 + ${InferenceEngine_VERSION_PATCH} * 100")
+ endif()
+ endif()
+ if(NOT INF_ENGINE_RELEASE AND NOT INF_ENGINE_RELEASE_INIT)
+ message(WARNING "InferenceEngine version has not been set, 2021.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
+ set(INF_ENGINE_RELEASE_INIT "2021040000")
+ elseif(DEFINED INF_ENGINE_RELEASE)
+ set(INF_ENGINE_RELEASE_INIT "${INF_ENGINE_RELEASE}")
endif()
- set(INF_ENGINE_RELEASE "2021030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
+ set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
- INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
+ INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)
endif()
diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake
index e3593d4dc9b3..c8ec55b58864 100644
--- a/cmake/OpenCVFindLibsGUI.cmake
+++ b/cmake/OpenCVFindLibsGUI.cmake
@@ -2,16 +2,7 @@
# Detect 3rd-party GUI libraries
# ----------------------------------------------------------------------------
-#--- Win32 UI ---
-ocv_clear_vars(HAVE_WIN32UI)
-if(WITH_WIN32UI)
- try_compile(HAVE_WIN32UI
- "${OpenCV_BINARY_DIR}"
- "${OpenCV_SOURCE_DIR}/cmake/checks/win32uitest.cpp"
- CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=user32;gdi32")
-endif()
-
-# --- QT4 ---
+# --- QT4/5 ---
ocv_clear_vars(HAVE_QT HAVE_QT5)
if(WITH_QT)
if(NOT WITH_QT EQUAL 4)
@@ -34,41 +25,6 @@ if(WITH_QT)
endif()
endif()
-# --- GTK ---
-ocv_clear_vars(HAVE_GTK HAVE_GTK3 HAVE_GTHREAD HAVE_GTKGLEXT)
-if(WITH_GTK AND NOT HAVE_QT)
- if(NOT WITH_GTK_2_X)
- ocv_check_modules(GTK3 gtk+-3.0)
- if(HAVE_GTK3)
- ocv_append_build_options(HIGHGUI GTK3)
- set(HAVE_GTK TRUE)
- endif()
- endif()
- if(NOT HAVE_GTK)
- ocv_check_modules(GTK2 gtk+-2.0)
- if(HAVE_GTK2)
- if (GTK2_VERSION VERSION_LESS MIN_VER_GTK)
- message (FATAL_ERROR "GTK support requires a minimum version of ${MIN_VER_GTK} (${GTK2_VERSION} found)")
- else()
- ocv_append_build_options(HIGHGUI GTK2)
- set(HAVE_GTK TRUE)
- endif()
- endif()
- endif()
- ocv_check_modules(GTHREAD gthread-2.0)
- if(HAVE_GTK AND NOT HAVE_GTHREAD)
- message(FATAL_ERROR "gthread not found. This library is required when building with GTK support")
- else()
- ocv_append_build_options(HIGHGUI GTHREAD)
- endif()
- if(WITH_OPENGL AND NOT HAVE_GTK3)
- ocv_check_modules(GTKGLEXT gtkglext-1.0)
- if(HAVE_GTKGLEXT)
- ocv_append_build_options(HIGHGUI GTKGLEXT)
- endif()
- endif()
-endif()
-
# --- OpenGl ---
ocv_clear_vars(HAVE_OPENGL HAVE_QT_OPENGL)
if(WITH_OPENGL)
diff --git a/cmake/OpenCVMinDepVersions.cmake b/cmake/OpenCVMinDepVersions.cmake
index ce0c0ba8165c..db225e2ab5b4 100644
--- a/cmake/OpenCVMinDepVersions.cmake
+++ b/cmake/OpenCVMinDepVersions.cmake
@@ -6,4 +6,3 @@ set(MIN_VER_CUDNN 7.5)
set(MIN_VER_PYTHON2 2.7)
set(MIN_VER_PYTHON3 3.2)
set(MIN_VER_ZLIB 1.2.3)
-set(MIN_VER_GTK 2.18.0)
diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index 0e783dfec68e..7c48aad9c295 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -1183,6 +1183,9 @@ function(ocv_add_perf_tests)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
+ if(TARGET opencv_highgui_plugins)
+ add_dependencies(${the_target} opencv_highgui_plugins)
+ endif()
if(HAVE_HPX)
message("Linking HPX to Perf test of module ${name}")
@@ -1278,6 +1281,9 @@ function(ocv_add_accuracy_tests)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
+ if(TARGET opencv_highgui_plugins)
+ add_dependencies(${the_target} opencv_highgui_plugins)
+ endif()
if(HAVE_HPX)
message("Linking HPX to Perf test of module ${name}")
@@ -1368,6 +1374,9 @@ function(ocv_add_samples)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
+ if(TARGET opencv_highgui_plugins)
+ add_dependencies(${the_target} opencv_highgui_plugins)
+ endif()
if(INSTALL_BIN_EXAMPLES)
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${module_id}" COMPONENT samples)
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index 252078bdf776..39445150a911 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -1973,3 +1973,9 @@ if(NOT BUILD_SHARED_LIBS AND (CMAKE_VERSION VERSION_LESS "3.14.0"))
else()
ocv_update(OPENCV_3RDPARTY_EXCLUDE_FROM_ALL "EXCLUDE_FROM_ALL")
endif()
+
+
+#
+# Include configuration override settings
+#
+include(cmake/vars/EnableModeVars.cmake)
diff --git a/cmake/android/android_gradle_projects.cmake b/cmake/android/android_gradle_projects.cmake
index 2e34a20d97a7..e07d26b5bbd9 100644
--- a/cmake/android/android_gradle_projects.cmake
+++ b/cmake/android/android_gradle_projects.cmake
@@ -2,6 +2,17 @@
set(ANDROID_GRADLE_PLUGIN_VERSION "3.2.1" CACHE STRING "Android Gradle Plugin version")
message(STATUS "Android Gradle Plugin version: ${ANDROID_GRADLE_PLUGIN_VERSION}")
+set(KOTLIN_PLUGIN_VERSION "1.4.10" CACHE STRING "Kotlin Plugin version")
+message(STATUS "kotlin Plugin version: ${KOTLIN_GRADLE_PLUGIN_VERSION}")
+
+if(BUILD_KOTLIN_EXTENSIONS)
+ set(KOTLIN_PLUGIN_DECLARATION "apply plugin: 'kotlin-android'" CACHE STRING "Kotlin Plugin version")
+ set(KOTLIN_STD_LIB "implementation 'org.jetbrains.kotlin:kotlin-stdlib:${KOTLIN_PLUGIN_VERSION}'" CACHE STRING "Kotlin Standard Library dependency")
+else()
+ set(KOTLIN_PLUGIN_DECLARATION "" CACHE STRING "Kotlin Plugin version")
+ set(KOTLIN_STD_LIB "" CACHE STRING "Kotlin Standard Library dependency")
+endif()
+
set(GRADLE_VERSION "5.6.4" CACHE STRING "Gradle version")
message(STATUS "Gradle version: ${GRADLE_VERSION}")
diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in
index c0f073604bc8..6439d8b43f06 100644
--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@ -28,9 +28,6 @@
/* Clp support */
#cmakedefine HAVE_CLP
-/* Cocoa API */
-#cmakedefine HAVE_COCOA
-
/* NVIDIA CUDA Runtime API*/
#cmakedefine HAVE_CUDA
@@ -56,12 +53,6 @@
/* Geospatial Data Abstraction Library */
#cmakedefine HAVE_GDAL
-/* GTK+ 2.0 Thread support */
-#cmakedefine HAVE_GTHREAD
-
-/* GTK+ 2.x toolkit */
-#cmakedefine HAVE_GTK
-
/* Halide support */
#cmakedefine HAVE_HALIDE
@@ -121,12 +112,6 @@
/* parallel_for with pthreads */
#cmakedefine HAVE_PTHREADS_PF
-/* Qt support */
-#cmakedefine HAVE_QT
-
-/* Qt OpenGL support */
-#cmakedefine HAVE_QT_OPENGL
-
/* Intel Threading Building Blocks */
#cmakedefine HAVE_TBB
@@ -136,9 +121,6 @@
/* TIFF codec */
#cmakedefine HAVE_TIFF
-/* Win32 UI */
-#cmakedefine HAVE_WIN32UI
-
/* Define if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
#cmakedefine WORDS_BIGENDIAN
diff --git a/cmake/vars/EnableModeVars.cmake b/cmake/vars/EnableModeVars.cmake
new file mode 100644
index 000000000000..b3c4e79c46d1
--- /dev/null
+++ b/cmake/vars/EnableModeVars.cmake
@@ -0,0 +1,18 @@
+set(__OCV_MODE_VARS_DIR "${CMAKE_CURRENT_LIST_DIR}")
+
+macro(ocv_change_mode_var)
+ set(__var "${ARGV0}")
+ set(__mode "${ARGV1}")
+ set(__value "${ARGV2}")
+ if(__mode STREQUAL "MODIFIED_ACCESS" AND __value)
+ if(NOT __applied_mode_${__var})
+ include("${__OCV_MODE_VARS_DIR}/${__var}.cmake")
+ set(__applied_mode_${__var} 1)
+ else()
+ #message("Mode is already applied: ${__var}")
+ endif()
+ endif()
+endmacro()
+
+variable_watch(OPENCV_DISABLE_THREAD_SUPPORT ocv_change_mode_var)
+set(OPENCV_DISABLE_THREAD_SUPPORT "${OPENCV_DISABLE_THREAD_SUPPORT}")
diff --git a/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake b/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake
new file mode 100644
index 000000000000..5f5fc0204dfc
--- /dev/null
+++ b/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake
@@ -0,0 +1,28 @@
+# Force removal of code conditionally compiled with `#if
+# HAVE_PTHREAD`.
+ocv_update(HAVE_PTHREAD 0)
+
+# There components are disabled because they require
+# multi-threaded execution.
+ocv_update(WITH_PROTOBUF OFF)
+ocv_update(WITH_GSTREAMER OFF)
+ocv_update(WITH_IPP OFF)
+ocv_update(WITH_ITT OFF)
+ocv_update(WITH_OPENCL OFF)
+ocv_update(WITH_VA OFF)
+ocv_update(WITH_VA_INTEL OFF)
+
+# Disable bindings
+ocv_update(BUILD_opencv_python2 OFF)
+ocv_update(BUILD_opencv_python3 OFF)
+ocv_update(BUILD_JAVA OFF)
+ocv_update(BUILD_opencv_java OFF)
+
+# These modules require `#include
+# <[thread|mutex|condition_variable|future]>` and linkage into
+# `libpthread` to work.
+ocv_update(BUILD_opencv_objdetect OFF)
+ocv_update(BUILD_opencv_gapi OFF)
+ocv_update(BUILD_opencv_dnn OFF)
+
+set(OPJ_USE_THREAD "OFF" CACHE INTERNAL "")
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 26ad42b1e5fb..ce207d3e318f 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -106,7 +106,7 @@ RECURSIVE = YES
EXCLUDE = @CMAKE_DOXYGEN_EXCLUDE_LIST@
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp *.m */opencl/runtime/* */legacy/* *_c.h @DOXYGEN_EXCLUDE_PATTERNS@
-EXCLUDE_SYMBOLS = cv::DataType<*> cv::traits::* int void CV__* T __CV*
+EXCLUDE_SYMBOLS = cv::DataType<*> cv::traits::* int void CV__* T __CV* cv::gapi::detail*
EXAMPLE_PATH = @CMAKE_DOXYGEN_EXAMPLE_PATH@
EXAMPLE_PATTERNS = *
EXAMPLE_RECURSIVE = YES
diff --git a/doc/opencv.bib b/doc/opencv.bib
index d44b0f5293e7..d0661e8d5f0b 100644
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -850,12 +850,12 @@ @article{Park94
journal = {IEEE Transactions on Robotics and Automation},
title = {Robot sensor calibration: solving AX=XB on the Euclidean group},
year = {1994},
+ month = oct,
volume = {10},
number = {5},
pages = {717-721},
doi = {10.1109/70.326576},
- ISSN = {1042-296X},
- month = {Oct}
+ issn = {1042-296X}
}
@inproceedings{PM03,
author = {P{\'e}rez, Patrick and Gangnet, Michel and Blake, Andrew},
@@ -1051,12 +1051,12 @@ @article{Tsai89
journal = {IEEE Transactions on Robotics and Automation},
title = {A new technique for fully autonomous and efficient 3D robotics hand/eye calibration},
year = {1989},
+ month = jun,
volume = {5},
number = {3},
pages = {345-358},
doi = {10.1109/70.34770},
- ISSN = {1042-296X},
- month = {June}
+ issn = {1042-296X}
}
@inproceedings{UES01,
author = {Uyttendaele, Matthew and Eden, Ashley and Skeliski, R},
@@ -1324,3 +1324,13 @@ @inproceedings{zhou2017east
pages={5551--5560},
year={2017}
}
+@article{umeyama1991least,
+ title={Least-squares estimation of transformation parameters between two point patterns},
+ author={Umeyama, Shinji},
+ journal={IEEE Computer Architecture Letters},
+ volume={13},
+ number={04},
+ pages={376--380},
+ year={1991},
+ publisher={IEEE Computer Society}
+}
diff --git a/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown b/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
index e24e692087c5..60e5686934d7 100644
--- a/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
+++ b/doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
@@ -40,12 +40,12 @@ using **cv.Sobel()**).
Then comes the main part. After this, they created a score, basically an equation, which
determines if a window can contain a corner or not.
-\f[R = det(M) - k(trace(M))^2\f]
+\f[R = \det(M) - k(\operatorname{trace}(M))^2\f]
where
- - \f$det(M) = \lambda_1 \lambda_2\f$
- - \f$trace(M) = \lambda_1 + \lambda_2\f$
- - \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of M
+ - \f$\det(M) = \lambda_1 \lambda_2\f$
+ - \f$\operatorname{trace}(M) = \lambda_1 + \lambda_2\f$
+ - \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of \f$M\f$
So the magnitudes of these eigenvalues decide whether a region is a corner, an edge, or flat.
diff --git a/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown b/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
index 1229581ce685..c5d29493e403 100644
--- a/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
+++ b/doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
@@ -20,7 +20,7 @@ Harris Corner Detector. The scoring function in Harris Corner Detector was given
Instead of this, Shi-Tomasi proposed:
-\f[R = min(\lambda_1, \lambda_2)\f]
+\f[R = \min(\lambda_1, \lambda_2)\f]
If it is a greater than a threshold value, it is considered as a corner. If we plot it in
\f$\lambda_1 - \lambda_2\f$ space as we did in Harris Corner Detector, we get an image as below:
@@ -28,7 +28,7 @@ If it is a greater than a threshold value, it is considered as a corner. If we p
![image](images/shitomasi_space.png)
From the figure, you can see that only when \f$\lambda_1\f$ and \f$\lambda_2\f$ are above a minimum value,
-\f$\lambda_{min}\f$, it is considered as a corner(green region).
+\f$\lambda_{\min}\f$, it is considered as a corner(green region).
Code
----
diff --git a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
index dee4df774ae8..bbbae6a3e6c8 100644
--- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
+++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
@@ -156,7 +156,7 @@ sift = cv.SIFT_create()
kp, des = sift.detectAndCompute(gray,None)
@endcode
Here kp will be a list of keypoints and des is a numpy array of shape
-\f$Number\_of\_Keypoints \times 128\f$.
+\f$\text{(Number of Keypoints)} \times 128\f$.
So we got keypoints, descriptors etc. Now we want to see how to match keypoints in different images.
That we will learn in coming chapters.
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index cea8d0c39f11..58b4ed55ca41 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -396,13 +396,14 @@ There are multiple less popular frameworks which can be used to read and write v
### videoio plugins
-Some _videoio_ backends can be built as plugins thus breaking strict dependency on third-party libraries and making them optional at runtime. Following options can be used to control this mechanism:
+Since version 4.1.0 some _videoio_ backends can be built as plugins thus breaking strict dependency on third-party libraries and making them optional at runtime. Following options can be used to control this mechanism:
| Option | Default | Description |
| --------| ------ | ------- |
| `VIDEOIO_ENABLE_PLUGINS` | _ON_ | Enable or disable plugins completely. |
| `VIDEOIO_PLUGIN_LIST` | _empty_ | Comma- or semicolon-separated list of backend names to be compiled as plugins. Supported names are _ffmpeg_, _gstreamer_, _msmf_, _mfx_ and _all_. |
-| `VIDEOIO_ENABLE_STRICT_PLUGIN_CHECK` | _ON_ | Enable strict runtime version check to only allow plugins built with the same version of OpenCV. |
+
+Check @ref tutorial_general_install for standalone plugins build instructions.
## Parallel processing {#tutorial_config_reference_func_core}
@@ -421,6 +422,17 @@ Some of OpenCV algorithms can use multithreading to accelerate processing. OpenC
@note OpenCV can download and build TBB library from GitHub, this functionality can be enabled with the `BUILD_TBB` option.
+### Threading plugins
+
+Since version 4.5.2 OpenCV supports dynamically loaded threading backends. At this moment only separate compilation process is supported: first you have to build OpenCV with some _default_ parallel backend (e.g. pthreads), then build each plugin and copy resulting binaries to the _lib_ or _bin_ folder.
+
+| Option | Default | Description |
+| ------ | ------- | ----------- |
+| PARALLEL_ENABLE_PLUGINS | ON | Enable plugin support, if this option is disabled OpenCV will not try to load anything |
+
+Check @ref tutorial_general_install for standalone plugins build instructions.
+
+
## GUI backends (highgui module) {#tutorial_config_reference_highgui}
OpenCV relies on various GUI libraries for window drawing.
@@ -442,6 +454,18 @@ OpenCV relies on various GUI libraries for window drawing.
OpenGL integration can be used to draw HW-accelerated windows with following backends: GTK, WIN32 and Qt. And enables basic interoperability with OpenGL, see @ref core_opengl and @ref highgui_opengl for details.
+### highgui plugins
+
+Since OpenCV 4.5.3 GTK backend can be build as a dynamically loaded plugin. Following options can be used to control this mechanism:
+
+| Option | Default | Description |
+| --------| ------ | ------- |
+| `HIGHGUI_ENABLE_PLUGINS` | _ON_ | Enable or disable plugins completely. |
+| `HIGHGUI_PLUGIN_LIST` | _empty_ | Comma- or semicolon-separated list of backend names to be compiled as plugins. Supported names are _gtk_, _gtk2_, _gtk3_, and _all_. |
+
+Check @ref tutorial_general_install for standalone plugins build instructions.
+
+
## Deep learning neural networks inference backends and options (dnn module) {#tutorial_config_reference_dnn}
OpenCV have own DNN inference module which have own build-in engine, but can also use other libraries for optimized processing. Multiple backends can be enabled in single build. Selection happens at runtime automatically or manually.
diff --git a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
index 4908771aec0d..749356063547 100644
--- a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
+++ b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
@@ -46,14 +46,14 @@ Open your Doxyfile using your favorite text editor and search for the key
`TAGFILES`. Change it as follows:
@code
-TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/4.5.2
+TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/4.5.3
@endcode
If you had other definitions already, you can append the line using a `\`:
@code
TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \
- ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/4.5.2
+ ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/4.5.3
@endcode
Doxygen can now use the information from the tag file to link to the OpenCV
diff --git a/doc/tutorials/introduction/general_install/general_install.markdown b/doc/tutorials/introduction/general_install/general_install.markdown
index e8c93f430eea..7b0c5d2b068a 100644
--- a/doc/tutorials/introduction/general_install/general_install.markdown
+++ b/doc/tutorials/introduction/general_install/general_install.markdown
@@ -105,7 +105,7 @@ cmake --build
make
```
-## Step 3: Install {#tutorial_general_install_sources_4}
+## (optional) Step 3: Install {#tutorial_general_install_sources_4}
During installation procedure build results and other files from build directory will be copied to the install location. Default installation location is `/usr/local` on UNIX and `C:/Program Files` on Windows. This location can be changed at the configuration step by setting `CMAKE_INSTALL_PREFIX` option. To perform installation run the following command:
```
@@ -117,3 +117,32 @@ This step is optional, OpenCV can be used directly from the build directory.
@note
If the installation root location is a protected system directory, so the installation process must be run with superuser or administrator privileges (e.g. `sudo cmake ...`).
+
+
+## (optional) Step 4: Build plugins {#tutorial_general_install_plugins_4}
+
+It is possible to decouple some of OpenCV dependencies and make them optional by extracting parts of the code into dynamically-loaded plugins. It helps to produce adaptive binary distributions which can work on systems with less dependencies and extend functionality just by installing missing libraries. For now modules _core_, _videoio_ and _highgui_ support this mechanism for some of their dependencies. In some cases it is possible to build plugins together with OpenCV by setting options like `VIDEOIO_PLUGIN_LIST` or `HIGHGUI_PLUGIN_LIST`, more options related to this scenario can be found in the @ref tutorial_config_reference. In other cases plugins should be built separately in their own build procedure and this section describes such standalone build process.
+
+@note It is recommended to use compiler, configuration and build options which are compatible to the one used for OpenCV build, otherwise resulting library can refuse to load or cause other runtime problems. Note that some functionality can be limited or work slower when backends are loaded dynamically due to extra barrier between OpenCV and corresponding third-party library.
+
+Build procedure is similar to the main OpenCV build, but you have to use special CMake projects located in corresponding subdirectories, these folders can also contain reference scripts and Docker images. It is important to use `opencv__` name prefix for plugins so that loader is able to find them. Each supported prefix can be used to load only one library, however multiple candidates can be probed for a single prefix. For example, you can have _libopencv_videoio_ffmpeg_3.so_ and _libopencv_videoio_ffmpeg_4.so_ plugins and the first one which can be loaded successfully will occupy internal slot and stop probing process. Possible prefixes and project locations are presented in the table below:
+
+| module | backends | location |
+| ------ | -------- | -------- |
+| core | parallel_tbb, parallel_onetbb, parallel_openmp | _opencv/modules/core/misc/plugins_ |
+| highgui | gtk, gtk2, gtk3 | _opencv/modules/highgui/misc/plugins_ |
+| videoio | ffmpeg, gstreamer, intel_mfx, msmf | _opencv/modules/videoio/misc_ |
+
+Example:
+```.sh
+# set-up environment for TBB detection, for example:
+# export TBB_DIR=
+cmake -G \
+ -DOPENCV_PLUGIN_NAME=opencv_core_tbb_ \
+ -DOPENCV_PLUGIN_DESTINATION= \
+ -DCMAKE_BUILD_TYPE= \
+ /modules/core/misc/plugins/parallel_tbb
+cmake --build . --config
+```
+
+@note On Windows plugins must be linked with existing OpenCV build. Set `OpenCV_DIR` environment or CMake variable to the directory with _OpenCVConfig.cmake_ file, it can be OpenCV build directory or some path in the location where you performed installation.
diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index ce79a33405a4..4928df65d1f1 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -748,7 +748,7 @@ CV_EXPORTS_W Mat findHomography(InputArray srcPoints, InputArray dstPoints, Outp
@param Qz Optional output 3x3 rotation matrix around z-axis.
The function computes a RQ decomposition using the given rotations. This function is used in
-decomposeProjectionMatrix to decompose the left 3x3 submatrix of a projection matrix into a camera
+#decomposeProjectionMatrix to decompose the left 3x3 submatrix of a projection matrix into a camera
and a rotation matrix.
It optionally returns three rotation matrices, one for each axis, and the three Euler angles in
@@ -802,7 +802,7 @@ CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray
The function computes partial derivatives of the elements of the matrix product \f$A*B\f$ with regard to
the elements of each of the two input matrices. The function is used to compute the Jacobian
-matrices in stereoCalibrate but can also be used in any other similar optimization function.
+matrices in #stereoCalibrate but can also be used in any other similar optimization function.
*/
CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB );
@@ -831,7 +831,7 @@ where \f$\mathrm{rodrigues}\f$ denotes a rotation vector to a rotation matrix tr
\f$\mathrm{rodrigues}^{-1}\f$ denotes the inverse transformation. See Rodrigues for details.
Also, the functions can compute the derivatives of the output vectors with regards to the input
-vectors (see matMulDeriv ). The functions are used inside stereoCalibrate but can also be used in
+vectors (see matMulDeriv ). The functions are used inside #stereoCalibrate but can also be used in
your own code where Levenberg-Marquardt or another gradient-based solver is used to optimize a
function that contains a matrix multiplication.
*/
@@ -1052,7 +1052,7 @@ a 3D point expressed in the world frame into the camera frame:
arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of
modules/calib3d/src/solvepnp.cpp version 2.4.9)
- The P3P algorithm requires image points to be in an array of shape (N,1,2) due
- to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
+ to its calling of #undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
which requires 2-channel information.
- Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
@@ -1257,7 +1257,7 @@ vectors, respectively, and further optimizes them.
- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using
-projectPoints ) objectPoints .
+ #projectPoints ) objectPoints .
- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points.
@@ -1393,7 +1393,7 @@ a 3D point expressed in the world frame into the camera frame:
arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of
modules/calib3d/src/solvepnp.cpp version 2.4.9)
- The P3P algorithm requires image points to be in an array of shape (N,1,2) due
- to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
+ to its calling of #undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
which requires 2-channel information.
- Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
@@ -1426,7 +1426,7 @@ CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoint
@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern
coordinate space. In the old interface all the per-view vectors are concatenated. See
-calibrateCamera for details.
+#calibrateCamera for details.
@param imagePoints Vector of vectors of the projections of the calibration pattern points. In the
old interface all the per-view vectors are concatenated.
@param imageSize Image size in pixels used to initialize the principal point.
@@ -1520,7 +1520,7 @@ Each entry stands for one corner of the pattern and can have one of the followin
- 3 = left-top corner of a black cell with a white marker dot
- 4 = left-top corner of a white cell with a black marker dot (pattern origin in case of markers otherwise first corner)
-The function is analog to findchessboardCorners but uses a localized radon
+The function is analog to #findChessboardCorners but uses a localized radon
transformation approximated by box filters being more robust to all sort of
noise, faster on larger images and is able to directly return the sub-pixel
position of the internal chessboard corners. The Method is based on the paper
@@ -1570,7 +1570,7 @@ and should be below ~3.0 pixels.
@param image Gray image used to find chessboard corners
@param patternSize Size of a found chessboard pattern
-@param corners Corners found by findChessboardCorners(SB)
+@param corners Corners found by #findChessboardCornersSB
@param rise_distance Rise distance 0.8 means 10% ... 90% of the final signal strength
@param vertical By default edge responses for horizontal lines are calculated
@param sharpness Optional output array with a sharpness value for calculated edge responses (see description)
@@ -1598,9 +1598,9 @@ CV_EXPORTS_W bool find4QuadCornerSubpix( InputArray img, InputOutputArray corner
@param image Destination image. It must be an 8-bit color image.
@param patternSize Number of inner corners per a chessboard row and column
(patternSize = cv::Size(points_per_row,points_per_column)).
-@param corners Array of detected corners, the output of findChessboardCorners.
+@param corners Array of detected corners, the output of #findChessboardCorners.
@param patternWasFound Parameter indicating whether the complete board was found or not. The
-return value of findChessboardCorners should be passed here.
+return value of #findChessboardCorners should be passed here.
The function draws individual chessboard corners detected either as red circles if the board was not
found, or as colored corners connected with lines if the board was found.
@@ -1837,21 +1837,21 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern.
-This function is an extension of calibrateCamera() with the method of releasing object which was
+This function is an extension of #calibrateCamera with the method of releasing object which was
proposed in @cite strobl2011iccv. In many common cases with inaccurate, unmeasured, roughly planar
targets (calibration plates), this method can dramatically improve the precision of the estimated
camera parameters. Both the object-releasing method and standard method are supported by this
function. Use the parameter **iFixedPoint** for method selection. In the internal implementation,
-calibrateCamera() is a wrapper for this function.
+#calibrateCamera is a wrapper for this function.
@param objectPoints Vector of vectors of calibration pattern points in the calibration pattern
-coordinate space. See calibrateCamera() for details. If the method of releasing object to be used,
+coordinate space. See #calibrateCamera for details. If the method of releasing object to be used,
the identical calibration board must be used in each view and it must be fully visible, and all
objectPoints[i] must be the same and all points should be roughly close to a plane. **The calibration
target has to be rigid, or at least static if the camera (rather than the calibration target) is
shifted for grabbing images.**
@param imagePoints Vector of vectors of the projections of calibration pattern points. See
-calibrateCamera() for details.
+#calibrateCamera for details.
@param imageSize Size of the image used only to initialize the intrinsic camera matrix.
@param iFixedPoint The index of the 3D object point in objectPoints[0] to be fixed. It also acts as
a switch for calibration method selection. If object-releasing method to be used, pass in the
@@ -1861,9 +1861,9 @@ board grid is recommended to be fixed when object-releasing method being utilize
\cite strobl2011iccv, two other points are also fixed. In this implementation, objectPoints[0].front
and objectPoints[0].back.z are used. With object-releasing method, accurate rvecs, tvecs and
newObjPoints are only possible if coordinates of these three fixed points are accurate enough.
-@param cameraMatrix Output 3x3 floating-point camera matrix. See calibrateCamera() for details.
-@param distCoeffs Output vector of distortion coefficients. See calibrateCamera() for details.
-@param rvecs Output vector of rotation vectors estimated for each pattern view. See calibrateCamera()
+@param cameraMatrix Output 3x3 floating-point camera matrix. See #calibrateCamera for details.
+@param distCoeffs Output vector of distortion coefficients. See #calibrateCamera for details.
+@param rvecs Output vector of rotation vectors estimated for each pattern view. See #calibrateCamera
for details.
@param tvecs Output vector of translation vectors estimated for each pattern view.
@param newObjPoints The updated output vector of calibration pattern points. The coordinates might
@@ -1871,15 +1871,15 @@ be scaled based on three fixed points. The returned coordinates are accurate onl
mentioned three fixed points are accurate. If not needed, noArray() can be passed in. This parameter
is ignored with standard calibration method.
@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
-See calibrateCamera() for details.
+See #calibrateCamera for details.
@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
-See calibrateCamera() for details.
+See #calibrateCamera for details.
@param stdDeviationsObjPoints Output vector of standard deviations estimated for refined coordinates
of calibration pattern points. It has the same size and order as objectPoints[0] vector. This
parameter is ignored with standard calibration method.
@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
@param flags Different flags that may be zero or a combination of some predefined values. See
-calibrateCamera() for details. If the method of releasing object is used, the calibration time may
+#calibrateCamera for details. If the method of releasing object is used, the calibration time may
be much longer. CALIB_USE_QR or CALIB_USE_LU could be used for faster calibration with potentially
less precise and less stable in some rare cases.
@param criteria Termination criteria for the iterative optimization algorithm.
@@ -1888,7 +1888,7 @@ less precise and less stable in some rare cases.
The function estimates the intrinsic camera parameters and extrinsic parameters for each of the
views. The algorithm is based on @cite Zhang2000, @cite BouguetMCT and @cite strobl2011iccv. See
-calibrateCamera() for other detailed explanations.
+#calibrateCamera for other detailed explanations.
@sa
calibrateCamera, findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort
*/
@@ -1915,8 +1915,8 @@ CV_EXPORTS_W double calibrateCameraRO( InputArrayOfArrays objectPoints,
/** @brief Computes useful camera characteristics from the camera intrinsic matrix.
-@param cameraMatrix Input camera intrinsic matrix that can be estimated by calibrateCamera or
-stereoCalibrate .
+@param cameraMatrix Input camera intrinsic matrix that can be estimated by #calibrateCamera or
+#stereoCalibrate .
@param imageSize Input image size in pixels.
@param apertureWidth Physical width in mm of the sensor.
@param apertureHeight Physical height in mm of the sensor.
@@ -2051,13 +2051,13 @@ Besides the stereo-related information, the function can also perform a full cal
the two cameras. However, due to the high dimensionality of the parameter space and noise in the
input data, the function can diverge from the correct solution. If the intrinsic parameters can be
estimated with high accuracy for each of the cameras individually (for example, using
-calibrateCamera ), you are recommended to do so and then pass @ref CALIB_FIX_INTRINSIC flag to the
+#calibrateCamera ), you are recommended to do so and then pass @ref CALIB_FIX_INTRINSIC flag to the
function along with the computed intrinsic parameters. Otherwise, if all the parameters are
estimated at once, it makes sense to restrict some parameters, for example, pass
@ref CALIB_SAME_FOCAL_LENGTH and @ref CALIB_ZERO_TANGENT_DIST flags, which is usually a
reasonable assumption.
-Similarly to calibrateCamera, the function minimizes the total re-projection error for all the
+Similarly to #calibrateCamera, the function minimizes the total re-projection error for all the
points in all the available views from both cameras. The function returns the final value of the
re-projection error.
*/
@@ -2117,7 +2117,7 @@ pixels from the original images from the cameras are retained in the rectified i
image pixels are lost). Any intermediate value yields an intermediate result between
those two extreme cases.
@param newImageSize New image resolution after rectification. The same size should be passed to
-initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
+#initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
is passed (default), it is set to the original imageSize . Setting it to a larger value can help you
preserve details in the original image, especially when there is a big radial distortion.
@param validPixROI1 Optional output rectangles inside the rectified images where all the pixels
@@ -2129,7 +2129,7 @@ are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are lik
The function computes the rotation matrices for each camera that (virtually) make both camera image
planes the same plane. Consequently, this makes all the epipolar lines parallel and thus simplifies
-the dense stereo correspondence problem. The function takes the matrices computed by stereoCalibrate
+the dense stereo correspondence problem. The function takes the matrices computed by #stereoCalibrate
as input. As output, it provides two rotation matrices and also two projection matrices in the new
coordinates. The function distinguishes the following two cases:
@@ -2173,7 +2173,7 @@ coordinates. The function distinguishes the following two cases:
@ref CALIB_ZERO_DISPARITY is set.
As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera
-matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to
+matrices. The matrices, together with R1 and R2 , can then be passed to #initUndistortRectifyMap to
initialize the rectification map for each camera.
See below the screenshot from the stereo_calib.cpp sample. Some red horizontal lines pass through
@@ -2196,9 +2196,9 @@ CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs
@param points1 Array of feature points in the first image.
@param points2 The corresponding points in the second image. The same formats as in
-findFundamentalMat are supported.
+#findFundamentalMat are supported.
@param F Input fundamental matrix. It can be computed from the same set of point pairs using
-findFundamentalMat .
+#findFundamentalMat .
@param imgSize Size of the image.
@param H1 Output rectification homography matrix for the first image.
@param H2 Output rectification homography matrix for the second image.
@@ -2209,7 +2209,7 @@ rejected prior to computing the homographies. Otherwise, all the points are cons
The function computes the rectification transformations without knowing intrinsic parameters of the
cameras and their relative position in the space, which explains the suffix "uncalibrated". Another
-related difference from stereoRectify is that the function outputs not the rectification
+related difference from #stereoRectify is that the function outputs not the rectification
transformations in the object (3D) space, but the planar perspective transformations encoded by the
homography matrices H1 and H2 . The function implements the algorithm @cite Hartley99 .
@@ -2218,8 +2218,8 @@ homography matrices H1 and H2 . The function implements the algorithm @cite Hart
depends on the epipolar geometry. Therefore, if the camera lenses have a significant distortion,
it would be better to correct it before computing the fundamental matrix and calling this
function. For example, distortion coefficients can be estimated for each head of stereo camera
- separately by using calibrateCamera . Then, the images can be corrected using undistort , or
- just the point coordinates can be corrected with undistortPoints .
+ separately by using #calibrateCamera . Then, the images can be corrected using #undistort , or
+ just the point coordinates can be corrected with #undistortPoints .
*/
CV_EXPORTS_W bool stereoRectifyUncalibrated( InputArray points1, InputArray points2,
InputArray F, Size imgSize,
@@ -2247,10 +2247,10 @@ assumed.
@param imageSize Original image size.
@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are
valid) and 1 (when all the source image pixels are retained in the undistorted image). See
-stereoRectify for details.
+#stereoRectify for details.
@param newImgSize Image size after rectification. By default, it is set to imageSize .
@param validPixROI Optional output rectangle that outlines all-good-pixels region in the
-undistorted image. See roi1, roi2 description in stereoRectify .
+undistorted image. See roi1, roi2 description in #stereoRectify .
@param centerPrincipalPoint Optional flag that indicates whether in the new camera intrinsic matrix the
principal point should be at the image center or not. By default, the principal point is chosen to
best fit a subset of the source image (determined by alpha) to the corrected image.
@@ -2262,7 +2262,7 @@ image pixels if there is valuable information in the corners alpha=1 , or get so
When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to
"virtual" pixels outside of the captured distorted image. The original camera intrinsic matrix, distortion
coefficients, the computed new camera intrinsic matrix, and newImageSize should be passed to
-initUndistortRectifyMap to produce the maps for remap .
+#initUndistortRectifyMap to produce the maps for #remap .
*/
CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs,
Size imageSize, double alpha, Size newImgSize = Size(),
@@ -2591,7 +2591,7 @@ CV_EXPORTS_W void convertPointsFromHomogeneous( InputArray src, OutputArray dst
@param dst Output vector of 2D, 3D, or 4D points.
The function converts 2D or 3D points from/to homogeneous coordinates by calling either
-convertPointsToHomogeneous or convertPointsFromHomogeneous.
+#convertPointsToHomogeneous or #convertPointsFromHomogeneous.
@note The function is obsolete. Use one of the previous two functions instead.
*/
@@ -2630,7 +2630,7 @@ matrices sequentially).
The calculated fundamental matrix may be passed further to computeCorrespondEpilines that finds the
epipolar lines corresponding to the specified points. It can also be passed to
-stereoRectifyUncalibrated to compute the rectification transformation. :
+#stereoRectifyUncalibrated to compute the rectification transformation. :
@code
// Example. Estimation of fundamental matrix using the RANSAC algorithm
int point_count = 100;
@@ -2675,7 +2675,7 @@ be floating-point (single or double precision).
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera intrinsic matrix. If this assumption does not hold for your use case, use
-`undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points
+#undistortPoints with `P = cv::NoArray()` for both cameras to transform image points
to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When
passing these coordinates, pass the identity matrix for this parameter.
@param method Method for computing an essential matrix.
@@ -2698,7 +2698,7 @@ This function estimates essential matrix based on the five-point algorithm solve
where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the
second images, respectively. The result of this function may be passed further to
-decomposeEssentialMat or recoverPose to recover the relative pose between cameras.
+#decomposeEssentialMat or #recoverPose to recover the relative pose between cameras.
*/
CV_EXPORTS_W
Mat findEssentialMat(
@@ -2773,13 +2773,13 @@ be floating-point (single or double precision).
@param cameraMatrix1 Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera matrix. If this assumption does not hold for your use case, use
-`undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points
+#undistortPoints with `P = cv::NoArray()` for both cameras to transform image points
to normalized image coordinates, which are valid for the identity camera matrix. When
passing these coordinates, pass the identity matrix for this parameter.
@param cameraMatrix2 Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera matrix. If this assumption does not hold for your use case, use
-`undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points
+#undistortPoints with `P = cv::NoArray()` for both cameras to transform image points
to normalized image coordinates, which are valid for the identity camera matrix. When
passing these coordinates, pass the identity matrix for this parameter.
@param distCoeffs1 Input vector of distortion coefficients
@@ -2807,7 +2807,7 @@ This function estimates essential matrix based on the five-point algorithm solve
where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the
second images, respectively. The result of this function may be passed further to
-decomposeEssentialMat or recoverPose to recover the relative pose between cameras.
+#decomposeEssentialMat or #recoverPose to recover the relative pose between cameras.
*/
CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
InputArray cameraMatrix1, InputArray distCoeffs1,
@@ -2869,7 +2869,7 @@ possible pose hypotheses by doing cheirality check. The cheirality check means t
triangulated 3D points should have positive depth. Some details can be found in @cite Nister03.
This function can be used to process the output E and mask from @ref findEssentialMat. In this
-scenario, points1 and points2 are the same input for findEssentialMat.:
+scenario, points1 and points2 are the same input for #findEssentialMat :
@code
// Example. Estimation of fundamental matrix using the RANSAC algorithm
int point_count = 100;
@@ -2964,14 +2964,14 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point
@param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV_32FC2 or
vector\ .
@param whichImage Index of the image (1 or 2) that contains the points .
-@param F Fundamental matrix that can be estimated using findFundamentalMat or stereoRectify .
+@param F Fundamental matrix that can be estimated using #findFundamentalMat or #stereoRectify .
@param lines Output vector of the epipolar lines corresponding to the points in the other image.
Each line \f$ax + by + c=0\f$ is encoded by 3 numbers \f$(a, b, c)\f$ .
For every point in one of the two images of a stereo pair, the function finds the equation of the
corresponding epipolar line in the other image.
-From the fundamental matrix definition (see findFundamentalMat ), line \f$l^{(2)}_i\f$ in the second
+From the fundamental matrix definition (see #findFundamentalMat ), line \f$l^{(2)}_i\f$ in the second
image for the point \f$p^{(1)}_i\f$ in the first image (when whichImage=1 ) is computed as:
\f[l^{(2)}_i = F p^{(1)}_i\f]
@@ -3047,7 +3047,7 @@ CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal,
int maxSpeckleSize, double maxDiff,
InputOutputArray buf = noArray() );
-//! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify())
+//! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by #stereoRectify)
CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2,
int minDisparity, int numberOfDisparities,
int blockSize );
@@ -3112,7 +3112,7 @@ sd( \texttt{pt1} , \texttt{pt2} )=
((\texttt{F}^t \cdot \texttt{pt2})(0))^2 +
((\texttt{F}^t \cdot \texttt{pt2})(1))^2}
\f]
-The fundamental matrix may be calculated using the cv::findFundamentalMat function. See @cite HartleyZ00 11.4.3 for details.
+The fundamental matrix may be calculated using the #findFundamentalMat function. See @cite HartleyZ00 11.4.3 for details.
@param pt1 first homogeneous 2d point
@param pt2 second homogeneous 2d point
@param F fundamental matrix
@@ -3172,6 +3172,33 @@ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst,
OutputArray out, OutputArray inliers,
double ransacThreshold = 3, double confidence = 0.99);
+/** @brief Computes an optimal affine transformation between two 3D point sets.
+
+It computes \f$R,s,t\f$ minimizing \f$\sum{i} dst_i - c \cdot R \cdot src_i \f$
+where \f$R\f$ is a 3x3 rotation matrix, \f$t\f$ is a 3x1 translation vector and \f$s\f$ is a
+scalar size value. This is an implementation of the algorithm by Umeyama \cite umeyama1991least .
+The estimated affine transform has a homogeneous scale which is a subclass of affine
+transformations with 7 degrees of freedom. The paired point sets need to comprise at least 3
+points each.
+
+@param src First input 3D point set.
+@param dst Second input 3D point set.
+@param scale If null is passed, the scale parameter c will be assumed to be 1.0.
+Else the pointed-to variable will be set to the optimal scale.
+@param force_rotation If true, the returned rotation will never be a reflection.
+This might be unwanted, e.g. when optimizing a transform between a right- and a
+left-handed coordinate system.
+@return 3D affine transformation matrix \f$3 \times 4\f$ of the form
+\f[T =
+\begin{bmatrix}
+R & t\\
+\end{bmatrix}
+\f]
+
+ */
+CV_EXPORTS_W cv::Mat estimateAffine3D(InputArray src, InputArray dst,
+ CV_OUT double* scale = nullptr, bool force_rotation = true);
+
/** @brief Computes an optimal translation between two 3D point sets.
*
* It computes
@@ -3379,10 +3406,10 @@ CV_EXPORTS_W int decomposeHomographyMat(InputArray H,
@param beforePoints Vector of (rectified) visible reference points before the homography is applied
@param afterPoints Vector of (rectified) visible reference points after the homography is applied
@param possibleSolutions Vector of int indices representing the viable solution set after filtering
-@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the findHomography function
+@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the #findHomography function
-This function is intended to filter the output of the decomposeHomographyMat based on additional
-information as described in @cite Malis . The summary of the method: the decomposeHomographyMat function
+This function is intended to filter the output of the #decomposeHomographyMat based on additional
+information as described in @cite Malis . The summary of the method: the #decomposeHomographyMat function
returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the
sets of points visible in the camera frame before and after the homography transformation is applied,
we can determine which are the true potential solutions and which are the opposites by verifying which
@@ -3620,7 +3647,7 @@ CV_EXPORTS_W void undistort( InputArray src, OutputArray dst,
/** @brief Computes the undistortion and rectification transformation map.
The function computes the joint undistortion and rectification transformation and represents the
-result in the form of maps for remap. The undistorted image looks like original, as if it is
+result in the form of maps for #remap. The undistorted image looks like original, as if it is
captured with a camera using the camera matrix =newCameraMatrix and zero distortion. In case of a
monocular camera, newCameraMatrix is usually equal to cameraMatrix, or it can be computed by
#getOptimalNewCameraMatrix for a better control over scaling. In case of a stereo camera,
@@ -3630,7 +3657,7 @@ Also, this new camera is oriented differently in the coordinate space, according
example, helps to align two heads of a stereo camera so that the epipolar lines on both images
become horizontal and have the same y- coordinate (in case of a horizontally aligned stereo camera).
-The function actually builds the maps for the inverse mapping algorithm that is used by remap. That
+The function actually builds the maps for the inverse mapping algorithm that is used by #remap. That
is, for each pixel \f$(u, v)\f$ in the destination (corrected and rectified) image, the function
computes the corresponding coordinates in the source image (that is, in the original image from
camera). The following process is applied:
@@ -3658,7 +3685,7 @@ where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x
are the distortion coefficients.
In case of a stereo camera, this function is called twice: once for each camera head, after
-stereoRectify, which in its turn is called after #stereoCalibrate. But if the stereo camera
+#stereoRectify, which in its turn is called after #stereoCalibrate. But if the stereo camera
was not calibrated, it is still possible to compute the rectification transformations directly from
the fundamental matrix using #stereoRectifyUncalibrated. For each camera, the function computes
homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D
@@ -3684,6 +3711,77 @@ void initUndistortRectifyMap(InputArray cameraMatrix, InputArray distCoeffs,
InputArray R, InputArray newCameraMatrix,
Size size, int m1type, OutputArray map1, OutputArray map2);
+/** @brief Computes the projection and inverse-rectification transformation map. In essense, this is the inverse of
+#initUndistortRectifyMap to accomodate stereo-rectification of projectors ('inverse-cameras') in projector-camera pairs.
+
+The function computes the joint projection and inverse rectification transformation and represents the
+result in the form of maps for #remap. The projected image looks like a distorted version of the original which,
+once projected by a projector, should visually match the original. In case of a monocular camera, newCameraMatrix
+is usually equal to cameraMatrix, or it can be computed by
+#getOptimalNewCameraMatrix for a better control over scaling. In case of a projector-camera pair,
+newCameraMatrix is normally set to P1 or P2 computed by #stereoRectify .
+
+The projector is oriented differently in the coordinate space, according to R. In case of projector-camera pairs,
+this helps align the projector (in the same manner as #initUndistortRectifyMap for the camera) to create a stereo-rectified pair. This
+allows epipolar lines on both images to become horizontal and have the same y-coordinate (in case of a horizontally aligned projector-camera pair).
+
+The function builds the maps for the inverse mapping algorithm that is used by #remap. That
+is, for each pixel \f$(u, v)\f$ in the destination (projected and inverse-rectified) image, the function
+computes the corresponding coordinates in the source image (that is, in the original digital image). The following process is applied:
+
+\f[
+\begin{array}{l}
+\text{newCameraMatrix}\\
+x \leftarrow (u - {c'}_x)/{f'}_x \\
+y \leftarrow (v - {c'}_y)/{f'}_y \\
+
+\\\text{Undistortion}
+\\\scriptsize{\textit{though equation shown is for radial undistortion, function implements cv::undistortPoints()}}\\
+r^2 \leftarrow x^2 + y^2 \\
+\theta \leftarrow \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6}\\
+x' \leftarrow \frac{x}{\theta} \\
+y' \leftarrow \frac{y}{\theta} \\
+
+\\\text{Rectification}\\
+{[X\,Y\,W]} ^T \leftarrow R*[x' \, y' \, 1]^T \\
+x'' \leftarrow X/W \\
+y'' \leftarrow Y/W \\
+
+\\\text{cameraMatrix}\\
+map_x(u,v) \leftarrow x'' f_x + c_x \\
+map_y(u,v) \leftarrow y'' f_y + c_y
+\end{array}
+\f]
+where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+are the distortion coefficients vector distCoeffs.
+
+In case of a stereo-rectified projector-camera pair, this function is called for the projector while #initUndistortRectifyMap is called for the camera head.
+This is done after #stereoRectify, which in turn is called after #stereoCalibrate. If the projector-camera pair
+is not calibrated, it is still possible to compute the rectification transformations directly from
+the fundamental matrix using #stereoRectifyUncalibrated. For the projector and camera, the function computes
+homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D
+space. R can be computed from H as
+\f[\texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix}\f]
+where cameraMatrix can be chosen arbitrarily.
+
+@param cameraMatrix Input camera matrix \f$A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
+@param R Optional rectification transformation in the object space (3x3 matrix). R1 or R2,
+computed by #stereoRectify can be passed here. If the matrix is empty, the identity transformation
+is assumed.
+@param newCameraMatrix New camera matrix \f$A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}\f$.
+@param size Distorted image size.
+@param m1type Type of the first output map. Can be CV_32FC1, CV_32FC2 or CV_16SC2, see #convertMaps
+@param map1 The first output map for #remap.
+@param map2 The second output map for #remap.
+ */
+CV_EXPORTS_W
+void initInverseRectificationMap( InputArray cameraMatrix, InputArray distCoeffs,
+ InputArray R, InputArray newCameraMatrix,
+ const Size& size, int m1type, OutputArray map1, OutputArray map2 );
+
//! initializes maps for #remap for wide-angle
CV_EXPORTS
float initWideAngleProjMap(InputArray cameraMatrix, InputArray distCoeffs,
@@ -3730,7 +3828,7 @@ Mat getDefaultNewCameraMatrix(InputArray cameraMatrix, Size imgsize = Size(),
The function is similar to #undistort and #initUndistortRectifyMap but it operates on a
sparse set of points instead of a raster image. Also the function performs a reverse transformation
-to projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a
+to #projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a
planar object, it does, up to a translation vector, if the proper R is specified.
For each observed point coordinate \f$(u, v)\f$ the function computes:
@@ -3840,7 +3938,7 @@ namespace fisheye
@param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ .
Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity.
- This means if you want to transform back points undistorted with undistortPoints() you have to
+ This means if you want to transform back points undistorted with #fisheye::undistortPoints you have to
multiply them with \f$P^{-1}\f$.
*/
CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0);
@@ -3859,7 +3957,7 @@ namespace fisheye
CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted,
InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray());
- /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero
+ /** @brief Computes undistortion and rectification maps for image transform by #remap. If D is empty zero
distortion is used, if R or P is empty identity matrixes are used.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@@ -3868,7 +3966,7 @@ namespace fisheye
1-channel or 1x1 3-channel
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
@param size Undistorted image size.
- @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
+ @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See #convertMaps
for details.
@param map1 The first output map.
@param map2 The second output map.
@@ -3888,14 +3986,14 @@ namespace fisheye
The function transforms an image to compensate radial and tangential lens distortion.
- The function is simply a combination of fisheye::initUndistortRectifyMap (with unity R ) and remap
+ The function is simply a combination of #fisheye::initUndistortRectifyMap (with unity R ) and #remap
(with bilinear interpolation). See the former function for details of the transformation being
performed.
See below the results of undistortImage.
- a\) result of undistort of perspective camera model (all possible coefficients (k_1, k_2, k_3,
k_4, k_5, k_6) of distortion were optimized under calibration)
- - b\) result of fisheye::undistortImage of fisheye camera model (all possible coefficients (k_1, k_2,
+ - b\) result of #fisheye::undistortImage of fisheye camera model (all possible coefficients (k_1, k_2,
k_3, k_4) of fisheye distortion were optimized under calibration)
- c\) original image was captured with fisheye lens
@@ -3985,7 +4083,7 @@ optimization. It is the \f$max(width,height)/\pi\f$ or the provided \f$f_x\f$, \
horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
useful image area.
@param newImageSize New image resolution after rectification. The same size should be passed to
- initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
+ #initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
is passed (default), it is set to the original imageSize . Setting it to larger value can help you
preserve details in the original image, especially when there is a big radial distortion.
@param balance Sets the new focal length in range between the min focal length and the max focal
diff --git a/modules/calib3d/perf/perf_undistort.cpp b/modules/calib3d/perf/perf_undistort.cpp
index 5372aaea9220..e15d2aefe3a1 100644
--- a/modules/calib3d/perf/perf_undistort.cpp
+++ b/modules/calib3d/perf/perf_undistort.cpp
@@ -16,4 +16,15 @@ PERF_TEST(Undistort, InitUndistortMap)
SANITY_CHECK_NOTHING();
}
+PERF_TEST(Undistort, DISABLED_InitInverseRectificationMap)
+{
+ Size size_w_h(512 + 3, 512);
+ Mat k(3, 3, CV_32FC1);
+ Mat d(1, 14, CV_64FC1);
+ Mat dst(size_w_h, CV_32FC2);
+ declare.in(k, d, WARMUP_RNG).out(dst);
+ TEST_CYCLE() initInverseRectificationMap(k, d, noArray(), k, size_w_h, CV_32FC2, dst, noArray());
+ SANITY_CHECK_NOTHING();
+}
+
} // namespace
diff --git a/modules/calib3d/src/chessboard.cpp b/modules/calib3d/src/chessboard.cpp
index dbc47722cba9..18e2605f53b5 100644
--- a/modules/calib3d/src/chessboard.cpp
+++ b/modules/calib3d/src/chessboard.cpp
@@ -3924,7 +3924,7 @@ bool findChessboardCornersSB(cv::InputArray image_, cv::Size pattern_size,
{
meta_.create(int(board.rowCount()),int(board.colCount()),CV_8UC1);
cv::Mat meta = meta_.getMat();
- meta = 0;
+ meta.setTo(cv::Scalar::all(0));
for(int row =0;row < meta.rows-1;++row)
{
for(int col=0;col< meta.cols-1;++col)
diff --git a/modules/calib3d/src/ptsetreg.cpp b/modules/calib3d/src/ptsetreg.cpp
index 6bd3b16c32f6..5c91fff037cc 100644
--- a/modules/calib3d/src/ptsetreg.cpp
+++ b/modules/calib3d/src/ptsetreg.cpp
@@ -900,6 +900,86 @@ int estimateAffine3D(InputArray _from, InputArray _to,
return createRANSACPointSetRegistrator(makePtr(), 4, ransacThreshold, confidence)->run(dFrom, dTo, _out, _inliers);
}
+Mat estimateAffine3D(InputArray _from, InputArray _to,
+ CV_OUT double* _scale, bool force_rotation)
+{
+ CV_INSTRUMENT_REGION();
+ Mat from = _from.getMat(), to = _to.getMat();
+ int count = from.checkVector(3);
+
+ CV_CheckGE(count, 3, "Umeyama algorithm needs at least 3 points for affine transformation estimation.");
+ CV_CheckEQ(to.checkVector(3), count, "Point sets need to have the same size");
+ from = from.reshape(1, count);
+ to = to.reshape(1, count);
+ if(from.type() != CV_64F)
+ from.convertTo(from, CV_64F);
+ if(to.type() != CV_64F)
+ to.convertTo(to, CV_64F);
+
+ const double one_over_n = 1./count;
+
+ const auto colwise_mean = [one_over_n](const Mat& m)
+ {
+ Mat my;
+ reduce(m, my, 0, REDUCE_SUM, CV_64F);
+ return my * one_over_n;
+ };
+
+ const auto demean = [count](const Mat& A, const Mat& mean)
+ {
+ Mat A_centered = Mat::zeros(count, 3, CV_64F);
+ for(int i = 0; i < count; i++)
+ {
+ A_centered.row(i) = A.row(i) - mean;
+ }
+ return A_centered;
+ };
+
+ Mat from_mean = colwise_mean(from);
+ Mat to_mean = colwise_mean(to);
+
+ Mat from_centered = demean(from, from_mean);
+ Mat to_centered = demean(to, to_mean);
+
+ Mat cov = to_centered.t() * from_centered * one_over_n;
+
+ Mat u,d,vt;
+ SVD::compute(cov, d, u, vt, SVD::MODIFY_A | SVD::FULL_UV);
+
+ CV_CheckGE(countNonZero(d), 2, "Points cannot be colinear");
+
+ Mat S = Mat::eye(3, 3, CV_64F);
+ // det(d) can only ever be >=0, so we can always use this here (compared to the original formula by Umeyama)
+ if (force_rotation && (determinant(u) * determinant(vt) < 0))
+ {
+ S.at(2, 2) = -1;
+ }
+ Mat rmat = u*S*vt;
+
+ double scale = 1.0;
+ if (_scale)
+ {
+ double var_from = 0.;
+ scale = 0.;
+ for(int i = 0; i < 3; i++)
+ {
+ var_from += norm(from_centered.col(i), NORM_L2SQR);
+ scale += d.at(i, 0) * S.at(i, i);
+ }
+ double inverse_var = count / var_from;
+ scale *= inverse_var;
+ *_scale = scale;
+ }
+ Mat new_to = scale * rmat * from_mean.t();
+
+ Mat transform;
+ transform.create(3, 4, CV_64F);
+ Mat r_part(transform(Rect(0, 0, 3, 3)));
+ rmat.copyTo(r_part);
+ transform.col(3) = to_mean.t() - new_to;
+ return transform;
+}
+
int estimateTranslation3D(InputArray _from, InputArray _to,
OutputArray _out, OutputArray _inliers,
double ransacThreshold, double confidence)
diff --git a/modules/calib3d/src/undistort.dispatch.cpp b/modules/calib3d/src/undistort.dispatch.cpp
index 2dd52037a959..146befd955f0 100644
--- a/modules/calib3d/src/undistort.dispatch.cpp
+++ b/modules/calib3d/src/undistort.dispatch.cpp
@@ -164,6 +164,125 @@ void initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoeffs,
fx, fy, k1, k2, p1, p2, k3, k4, k5, k6, s1, s2, s3, s4));
}
+void initInverseRectificationMap( InputArray _cameraMatrix, InputArray _distCoeffs,
+ InputArray _matR, InputArray _newCameraMatrix,
+ const Size& size, int m1type, OutputArray _map1, OutputArray _map2 )
+{
+ // Parameters
+ Mat cameraMatrix = _cameraMatrix.getMat(), distCoeffs = _distCoeffs.getMat();
+ Mat matR = _matR.getMat(), newCameraMatrix = _newCameraMatrix.getMat();
+
+ // Check m1type validity
+ if( m1type <= 0 )
+ m1type = CV_16SC2;
+ CV_Assert( m1type == CV_16SC2 || m1type == CV_32FC1 || m1type == CV_32FC2 );
+
+ // Init Maps
+ _map1.create( size, m1type );
+ Mat map1 = _map1.getMat(), map2;
+ if( m1type != CV_32FC2 )
+ {
+ _map2.create( size, m1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
+ map2 = _map2.getMat();
+ }
+ else {
+ _map2.release();
+ }
+
+ // Init camera intrinsics
+ Mat_ A = Mat_(cameraMatrix), Ar;
+ if( !newCameraMatrix.empty() )
+ Ar = Mat_(newCameraMatrix);
+ else
+ Ar = getDefaultNewCameraMatrix( A, size, true );
+ CV_Assert( A.size() == Size(3,3) );
+ CV_Assert( Ar.size() == Size(3,3) || Ar.size() == Size(4, 3));
+
+ // Init rotation matrix
+ Mat_ R = Mat_::eye(3, 3);
+ if( !matR.empty() )
+ {
+ R = Mat_(matR);
+ //Note, do not inverse
+ }
+ CV_Assert( Size(3,3) == R.size() );
+
+ // Init distortion vector
+ if( !distCoeffs.empty() ){
+ distCoeffs = Mat_(distCoeffs);
+
+ // Fix distortion vector orientation
+ if( distCoeffs.rows != 1 && !distCoeffs.isContinuous() ) {
+ distCoeffs = distCoeffs.t();
+ }
+ }
+
+ // Validate distortion vector size
+ CV_Assert( distCoeffs.empty() || // Empty allows cv::undistortPoints to skip distortion
+ distCoeffs.size() == Size(1, 4) || distCoeffs.size() == Size(4, 1) ||
+ distCoeffs.size() == Size(1, 5) || distCoeffs.size() == Size(5, 1) ||
+ distCoeffs.size() == Size(1, 8) || distCoeffs.size() == Size(8, 1) ||
+ distCoeffs.size() == Size(1, 12) || distCoeffs.size() == Size(12, 1) ||
+ distCoeffs.size() == Size(1, 14) || distCoeffs.size() == Size(14, 1));
+
+ // Create objectPoints
+ std::vector p2i_objPoints;
+ std::vector p2f_objPoints;
+ for (int r = 0; r < size.height; r++)
+ {
+ for (int c = 0; c < size.width; c++)
+ {
+ p2i_objPoints.push_back(cv::Point2i(c, r));
+ p2f_objPoints.push_back(cv::Point2f(static_cast(c), static_cast(r)));
+ }
+ }
+
+ // Undistort
+ std::vector p2f_objPoints_undistorted;
+ undistortPoints(
+ p2f_objPoints,
+ p2f_objPoints_undistorted,
+ A,
+ distCoeffs,
+ cv::Mat::eye(cv::Size(3, 3), CV_64FC1), // R
+ cv::Mat::eye(cv::Size(3, 3), CV_64FC1) // P = New K
+ );
+
+ // Rectify
+ std::vector p2f_sourcePoints_pinHole;
+ perspectiveTransform(
+ p2f_objPoints_undistorted,
+ p2f_sourcePoints_pinHole,
+ R
+ );
+
+ // Project points back to camera coordinates.
+ std::vector p2f_sourcePoints;
+ undistortPoints(
+ p2f_sourcePoints_pinHole,
+ p2f_sourcePoints,
+ cv::Mat::eye(cv::Size(3, 3), CV_32FC1), // K
+ cv::Mat::zeros(cv::Size(1, 4), CV_32FC1), // Distortion
+ cv::Mat::eye(cv::Size(3, 3), CV_32FC1), // R
+ Ar // New K
+ );
+
+ // Copy to map
+ if (m1type == CV_16SC2) {
+ for (size_t i=0; i < p2i_objPoints.size(); i++) {
+ map1.at(p2i_objPoints[i].y, p2i_objPoints[i].x) = Vec2s(saturate_cast(p2f_sourcePoints[i].x), saturate_cast(p2f_sourcePoints[i].y));
+ }
+ } else if (m1type == CV_32FC2) {
+ for (size_t i=0; i < p2i_objPoints.size(); i++) {
+ map1.at(p2i_objPoints[i].y, p2i_objPoints[i].x) = Vec2f(p2f_sourcePoints[i]);
+ }
+ } else { // m1type == CV_32FC1
+ for (size_t i=0; i < p2i_objPoints.size(); i++) {
+ map1.at(p2i_objPoints[i].y, p2i_objPoints[i].x) = p2f_sourcePoints[i].x;
+ map2.at(p2i_objPoints[i].y, p2i_objPoints[i].x) = p2f_sourcePoints[i].y;
+ }
+ }
+}
void undistort( InputArray _src, OutputArray _dst, InputArray _cameraMatrix,
InputArray _distCoeffs, InputArray _newCameraMatrix )
diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp
index 521b01ac08a8..3f1b50e5f262 100644
--- a/modules/calib3d/test/test_affine3d_estimator.cpp
+++ b/modules/calib3d/test/test_affine3d_estimator.cpp
@@ -201,4 +201,25 @@ TEST(Calib3d_EstimateAffine3D, regression_16007)
EXPECT_EQ(1, res);
}
+TEST(Calib3d_EstimateAffine3D, umeyama_3_pt)
+{
+ std::vector points = {{{0.80549149, 0.8225781, 0.79949521},
+ {0.28906756, 0.57158557, 0.9864789},
+ {0.58266182, 0.65474983, 0.25078834}}};
+ cv::Mat R = (cv::Mat_(3,3) << 0.9689135, -0.0232753, 0.2463025,
+ 0.0236362, 0.9997195, 0.0014915,
+ -0.2462682, 0.0043765, 0.9691918);
+ cv::Vec3d t(1., 2., 3.);
+ cv::Affine3d transform(R, t);
+ std::vector transformed_points(points.size());
+ std::transform(points.begin(), points.end(), transformed_points.begin(), [transform](const cv::Vec3d v){return transform * v;});
+ double scale;
+ cv::Mat trafo_est = estimateAffine3D(points, transformed_points, &scale);
+ Mat R_est(trafo_est(Rect(0, 0, 3, 3)));
+ EXPECT_LE(cvtest::norm(R_est, R, NORM_INF), 1e-6);
+ Vec3d t_est = trafo_est.col(3);
+ EXPECT_LE(cvtest::norm(t_est, t, NORM_INF), 1e-6);
+ EXPECT_NEAR(scale, 1.0, 1e-6);
+}
+
}} // namespace
diff --git a/modules/calib3d/test/test_undistort.cpp b/modules/calib3d/test/test_undistort.cpp
index c1ec2063ee04..ea1a95207954 100644
--- a/modules/calib3d/test/test_undistort.cpp
+++ b/modules/calib3d/test/test_undistort.cpp
@@ -719,11 +719,281 @@ double CV_InitUndistortRectifyMapTest::get_success_error_level( int /*test_case_
return 8;
}
+//------------------------------------------------------
+
+class CV_InitInverseRectificationMapTest : public cvtest::ArrayTest
+{
+public:
+ CV_InitInverseRectificationMapTest();
+protected:
+ int prepare_test_case (int test_case_idx);
+ void prepare_to_validation( int test_case_idx );
+ void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types );
+ double get_success_error_level( int test_case_idx, int i, int j );
+ void run_func();
+
+private:
+ static const int MAX_X = 1024;
+ static const int MAX_Y = 1024;
+ bool zero_new_cam;
+ bool zero_distortion;
+ bool zero_R;
+
+ cv::Size img_size;
+ int map_type;
+};
+
+CV_InitInverseRectificationMapTest::CV_InitInverseRectificationMapTest()
+{
+ test_array[INPUT].push_back(NULL); // camera matrix
+ test_array[INPUT].push_back(NULL); // distortion coeffs
+ test_array[INPUT].push_back(NULL); // R matrix
+ test_array[INPUT].push_back(NULL); // new camera matrix
+ test_array[OUTPUT].push_back(NULL); // inverse rectified mapx
+ test_array[OUTPUT].push_back(NULL); // inverse rectified mapy
+ test_array[REF_OUTPUT].push_back(NULL);
+ test_array[REF_OUTPUT].push_back(NULL);
+
+ zero_distortion = zero_new_cam = zero_R = false;
+ map_type = 0;
+}
+
+void CV_InitInverseRectificationMapTest::get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types )
+{
+ cvtest::ArrayTest::get_test_array_types_and_sizes(test_case_idx,sizes,types);
+ RNG& rng = ts->get_rng();
+ //rng.next();
+
+ map_type = CV_32F;
+ types[OUTPUT][0] = types[OUTPUT][1] = types[REF_OUTPUT][0] = types[REF_OUTPUT][1] = map_type;
+
+ img_size.width = cvtest::randInt(rng) % MAX_X + 1;
+ img_size.height = cvtest::randInt(rng) % MAX_Y + 1;
+
+ types[INPUT][0] = cvtest::randInt(rng)%2 ? CV_64F : CV_32F;
+ types[INPUT][1] = cvtest::randInt(rng)%2 ? CV_64F : CV_32F;
+ types[INPUT][2] = cvtest::randInt(rng)%2 ? CV_64F : CV_32F;
+ types[INPUT][3] = cvtest::randInt(rng)%2 ? CV_64F : CV_32F;
+
+ sizes[OUTPUT][0] = sizes[OUTPUT][1] = sizes[REF_OUTPUT][0] = sizes[REF_OUTPUT][1] = img_size;
+ sizes[INPUT][0] = sizes[INPUT][2] = sizes[INPUT][3] = cvSize(3,3);
+
+ Size dsize;
+
+ if (cvtest::randInt(rng)%2)
+ {
+ if (cvtest::randInt(rng)%2)
+ {
+ dsize = Size(1,4);
+ }
+ else
+ {
+ dsize = Size(1,5);
+ }
+ }
+ else
+ {
+ if (cvtest::randInt(rng)%2)
+ {
+ dsize = Size(4,1);
+ }
+ else
+ {
+ dsize = Size(5,1);
+ }
+ }
+ sizes[INPUT][1] = dsize;
+}
+
+
+int CV_InitInverseRectificationMapTest::prepare_test_case(int test_case_idx)
+{
+ RNG& rng = ts->get_rng();
+ int code = cvtest::ArrayTest::prepare_test_case( test_case_idx );
+
+ if (code <= 0)
+ return code;
+
+ int dist_size = test_mat[INPUT][1].cols > test_mat[INPUT][1].rows ? test_mat[INPUT][1].cols : test_mat[INPUT][1].rows;
+ double cam[9] = {0,0,0,0,0,0,0,0,1};
+ vector dist(dist_size);
+ vector new_cam(test_mat[INPUT][3].cols * test_mat[INPUT][3].rows);
+
+ Mat _camera(3,3,CV_64F,cam);
+ Mat _distort(test_mat[INPUT][1].size(),CV_64F,&dist[0]);
+ Mat _new_cam(test_mat[INPUT][3].size(),CV_64F,&new_cam[0]);
+
+ //Generating camera matrix
+ double sz = MAX(img_size.width,img_size.height);
+ double aspect_ratio = cvtest::randReal(rng)*0.6 + 0.7;
+ cam[2] = (img_size.width - 1)*0.5 + cvtest::randReal(rng)*10 - 5;
+ cam[5] = (img_size.height - 1)*0.5 + cvtest::randReal(rng)*10 - 5;
+ cam[0] = sz/(0.9 - cvtest::randReal(rng)*0.6);
+ cam[4] = aspect_ratio*cam[0];
+
+ //Generating distortion coeffs
+ dist[0] = cvtest::randReal(rng)*0.06 - 0.03;
+ dist[1] = cvtest::randReal(rng)*0.06 - 0.03;
+ if( dist[0]*dist[1] > 0 )
+ dist[1] = -dist[1];
+ if( cvtest::randInt(rng)%4 != 0 )
+ {
+ dist[2] = cvtest::randReal(rng)*0.004 - 0.002;
+ dist[3] = cvtest::randReal(rng)*0.004 - 0.002;
+ if (dist_size > 4)
+ dist[4] = cvtest::randReal(rng)*0.004 - 0.002;
+ }
+ else
+ {
+ dist[2] = dist[3] = 0;
+ if (dist_size > 4)
+ dist[4] = 0;
+ }
+
+ //Generating new camera matrix
+ _new_cam = Scalar::all(0);
+ new_cam[8] = 1;
+
+ // If P == K
+ //new_cam[0] = cam[0];
+ //new_cam[4] = cam[4];
+ //new_cam[2] = cam[2];
+ //new_cam[5] = cam[5];
+
+ // If P != K
+ new_cam[0] = cam[0] + (cvtest::randReal(rng) - (double)0.5)*0.2*cam[0]; //10%
+ new_cam[4] = cam[4] + (cvtest::randReal(rng) - (double)0.5)*0.2*cam[4]; //10%
+ new_cam[2] = cam[2] + (cvtest::randReal(rng) - (double)0.5)*0.3*img_size.width; //15%
+ new_cam[5] = cam[5] + (cvtest::randReal(rng) - (double)0.5)*0.3*img_size.height; //15%
+
+ //Generating R matrix
+ Mat _rot(3,3,CV_64F);
+ Mat rotation(1,3,CV_64F);
+ rotation.at(0) = CV_PI/8*(cvtest::randReal(rng) - (double)0.5); // phi
+ rotation.at(1) = CV_PI/8*(cvtest::randReal(rng) - (double)0.5); // ksi
+ rotation.at(2) = CV_PI/3*(cvtest::randReal(rng) - (double)0.5); //khi
+ cvtest::Rodrigues(rotation, _rot);
+
+ //cvSetIdentity(_rot);
+ //copying data
+ cvtest::convert( _camera, test_mat[INPUT][0], test_mat[INPUT][0].type());
+ cvtest::convert( _distort, test_mat[INPUT][1], test_mat[INPUT][1].type());
+ cvtest::convert( _rot, test_mat[INPUT][2], test_mat[INPUT][2].type());
+ cvtest::convert( _new_cam, test_mat[INPUT][3], test_mat[INPUT][3].type());
+
+ zero_distortion = (cvtest::randInt(rng)%2) == 0 ? false : true;
+ zero_new_cam = (cvtest::randInt(rng)%2) == 0 ? false : true;
+ zero_R = (cvtest::randInt(rng)%2) == 0 ? false : true;
+
+ return code;
+}
+
+void CV_InitInverseRectificationMapTest::prepare_to_validation(int/* test_case_idx*/)
+{
+ // Configure Parameters
+ Mat _a0 = test_mat[INPUT][0];
+ Mat _d0 = zero_distortion ? cv::Mat() : test_mat[INPUT][1];
+ Mat _R0 = zero_R ? cv::Mat() : test_mat[INPUT][2];
+ Mat _new_cam0 = zero_new_cam ? test_mat[INPUT][0] : test_mat[INPUT][3];
+ Mat _mapx(img_size, CV_32F), _mapy(img_size, CV_32F);
+
+ double a[9], d[5]={0., 0., 0., 0. , 0.}, R[9]={1., 0., 0., 0., 1., 0., 0., 0., 1.}, a1[9];
+ Mat _a(3, 3, CV_64F, a), _a1(3, 3, CV_64F, a1);
+ Mat _d(_d0.rows,_d0.cols, CV_MAKETYPE(CV_64F,_d0.channels()),d);
+ Mat _R(3, 3, CV_64F, R);
+ double fx, fy, cx, cy, ifx, ify, cxn, cyn;
+
+ // Camera matrix
+ CV_Assert(_a0.size() == Size(3, 3));
+ _a0.convertTo(_a, CV_64F);
+ if( !_new_cam0.empty() )
+ {
+ CV_Assert(_new_cam0.size() == Size(3, 3));
+ _new_cam0.convertTo(_a1, CV_64F);
+ }
+ else
+ {
+ _a.copyTo(_a1);
+ }
+
+ // Distortion
+ CV_Assert(_d0.empty() ||
+ _d0.size() == Size(5, 1) ||
+ _d0.size() == Size(1, 5) ||
+ _d0.size() == Size(4, 1) ||
+ _d0.size() == Size(1, 4));
+ if( !_d0.empty() )
+ _d0.convertTo(_d, CV_64F);
+
+ // Rotation
+ if( !_R0.empty() )
+ {
+ CV_Assert(_R0.size() == Size(3, 3));
+ Mat tmp;
+ _R0.convertTo(_R, CV_64F);
+ }
+
+ // Copy camera matrix
+ fx = a[0]; fy = a[4]; cx = a[2]; cy = a[5];
+
+ // Copy new camera matrix
+ ifx = a1[0]; ify = a1[4]; cxn = a1[2]; cyn = a1[5];
+
+ // Undistort
+ for( int v = 0; v < img_size.height; v++ )
+ {
+ for( int u = 0; u < img_size.width; u++ )
+ {
+ // Convert from image to pin-hole coordinates
+ double x = (u - cx)/fx;
+ double y = (v - cy)/fy;
+
+ // Undistort
+ double x2 = x*x, y2 = y*y;
+ double r2 = x2 + y2;
+ double cdist = 1./(1. + (d[0] + (d[1] + d[4]*r2)*r2)*r2); // (1. + (d[5] + (d[6] + d[7]*r2)*r2)*r2) == 1 as d[5-7]=0;
+ double x_ = (x - (d[2]*2.*x*y + d[3]*(r2 + 2.*x2)))*cdist;
+ double y_ = (y - (d[3]*2.*x*y + d[2]*(r2 + 2.*y2)))*cdist;
+
+ // Rectify
+ double X = R[0]*x_ + R[1]*y_ + R[2];
+ double Y = R[3]*x_ + R[4]*y_ + R[5];
+ double Z = R[6]*x_ + R[7]*y_ + R[8];
+ double x__ = X/Z;
+ double y__ = Y/Z;
+
+ // Convert from pin-hole to image coordinates
+ _mapy.at(v, u) = (float)(y__*ify + cyn);
+ _mapx.at(v, u) = (float)(x__*ifx + cxn);
+ }
+ }
+
+ // Convert
+ _mapx.convertTo(test_mat[REF_OUTPUT][0], test_mat[REF_OUTPUT][0].type());
+ _mapy.convertTo(test_mat[REF_OUTPUT][1], test_mat[REF_OUTPUT][0].type());
+}
+
+void CV_InitInverseRectificationMapTest::run_func()
+{
+ cv::Mat camera_mat = test_mat[INPUT][0];
+ cv::Mat dist = zero_distortion ? cv::Mat() : test_mat[INPUT][1];
+ cv::Mat R = zero_R ? cv::Mat() : test_mat[INPUT][2];
+ cv::Mat new_cam = zero_new_cam ? cv::Mat() : test_mat[INPUT][3];
+ cv::Mat& mapx = test_mat[OUTPUT][0], &mapy = test_mat[OUTPUT][1];
+ cv::initInverseRectificationMap(camera_mat,dist,R,new_cam,img_size,map_type,mapx,mapy);
+}
+
+double CV_InitInverseRectificationMapTest::get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ )
+{
+ return 8;
+}
+
//////////////////////////////////////////////////////////////////////////////////////////////////////
TEST(Calib3d_DefaultNewCameraMatrix, accuracy) { CV_DefaultNewCameraMatrixTest test; test.safe_run(); }
TEST(Calib3d_UndistortPoints, accuracy) { CV_UndistortPointsTest test; test.safe_run(); }
TEST(Calib3d_InitUndistortRectifyMap, accuracy) { CV_InitUndistortRectifyMapTest test; test.safe_run(); }
+TEST(DISABLED_Calib3d_InitInverseRectificationMap, accuracy) { CV_InitInverseRectificationMapTest test; test.safe_run(); }
////////////////////////////// undistort /////////////////////////////////
@@ -1537,4 +1807,78 @@ TEST(Calib3d_initUndistortRectifyMap, regression_14467)
EXPECT_LE(cvtest::norm(dst, mesh_uv, NORM_INF), 1e-3);
}
+TEST(Calib3d_initInverseRectificationMap, regression_20165)
+{
+ Size size_w_h(1280, 800);
+ Mat dst(size_w_h, CV_32FC2); // Reference for validation
+ Mat mapxy; // Output of initInverseRectificationMap()
+
+ // Camera Matrix
+ double k[9]={
+ 1.5393951443032472e+03, 0., 6.7491727003047140e+02,
+ 0., 1.5400748240626747e+03, 5.1226968329123963e+02,
+ 0., 0., 1.
+ };
+ Mat _K(3, 3, CV_64F, k);
+
+ // Distortion
+ // double d[5]={0,0,0,0,0}; // Zero Distortion
+ double d[5]={ // Non-zero distortion
+ -3.4134571357400023e-03, 2.9733267766101856e-03, // K1, K2
+ 3.6653586399031184e-03, -3.1960714017365702e-03, // P1, P2
+ 0. // K3
+ };
+ Mat _d(1, 5, CV_64F, d);
+
+ // Rotation
+ //double R[9]={1., 0., 0., 0., 1., 0., 0., 0., 1.}; // Identity transform (none)
+ double R[9]={ // Random transform
+ 9.6625486010428052e-01, 1.6055789378989216e-02, 2.5708706103628531e-01,
+ -8.0300261706161002e-03, 9.9944797497929860e-01, -3.2237617614807819e-02,
+ -2.5746274294459848e-01, 2.9085338870243265e-02, 9.6585039165403186e-01
+ };
+ Mat _R(3, 3, CV_64F, R);
+
+ // --- Validation --- //
+ initInverseRectificationMap(_K, _d, _R, _K, size_w_h, CV_32FC2, mapxy, noArray());
+
+ // Copy camera matrix
+ double fx, fy, cx, cy, ifx, ify, cxn, cyn;
+ fx = k[0]; fy = k[4]; cx = k[2]; cy = k[5];
+
+ // Copy new camera matrix
+ ifx = k[0]; ify = k[4]; cxn = k[2]; cyn = k[5];
+
+ // Distort Points
+ for( int v = 0; v < size_w_h.height; v++ )
+ {
+ for( int u = 0; u < size_w_h.width; u++ )
+ {
+ // Convert from image to pin-hole coordinates
+ double x = (u - cx)/fx;
+ double y = (v - cy)/fy;
+
+ // Undistort
+ double x2 = x*x, y2 = y*y;
+ double r2 = x2 + y2;
+ double cdist = 1./(1. + (d[0] + (d[1] + d[4]*r2)*r2)*r2); // (1. + (d[5] + (d[6] + d[7]*r2)*r2)*r2) == 1 as d[5-7]=0;
+ double x_ = (x - (d[2]*2.*x*y + d[3]*(r2 + 2.*x2)))*cdist;
+ double y_ = (y - (d[3]*2.*x*y + d[2]*(r2 + 2.*y2)))*cdist;
+
+ // Rectify
+ double X = R[0]*x_ + R[1]*y_ + R[2];
+ double Y = R[3]*x_ + R[4]*y_ + R[5];
+ double Z = R[6]*x_ + R[7]*y_ + R[8];
+ double x__ = X/Z;
+ double y__ = Y/Z;
+
+ // Convert from pin-hole to image coordinates
+ dst.at(v, u) = Vec2f((float)(x__*ifx + cxn), (float)(y__*ify + cyn));
+ }
+ }
+
+ // Check Result
+ EXPECT_LE(cvtest::norm(dst, mapxy, NORM_INF), 2e-1);
+}
+
}} // namespace
diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt
index b2797ab31fc1..6a969e5fc358 100644
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@@ -153,6 +153,10 @@ if(OPENCV_CORE_EXCLUDE_C_API)
ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1")
endif()
+if(OPENCV_DISABLE_THREAD_SUPPORT)
+ ocv_target_compile_definitions(${the_module} PUBLIC "OPENCV_DISABLE_THREAD_SUPPORT=1")
+endif()
+
if(HAVE_HPX)
ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
endif()
diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
index fe15e51e4e85..8365b10ba9fd 100644
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -142,6 +142,11 @@
# define CV_NEON 1
#endif
+#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
+# include
+# define CV_RVV071 1
+#endif
+
#if defined(__ARM_NEON__) || defined(__aarch64__)
# include
#endif
@@ -338,6 +343,10 @@ struct VZeroUpperGuard {
# define CV_NEON 0
#endif
+#ifndef CV_RVV071
+# define CV_RVV071 0
+#endif
+
#ifndef CV_VSX
# define CV_VSX 0
#endif
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index ebb1f65eb788..7f510748af13 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -271,6 +271,8 @@ namespace cv {
#define CV_CPU_MSA 150
+#define CV_CPU_RISCVV 170
+
#define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
@@ -325,6 +327,8 @@ enum CpuFeatures {
CPU_MSA = 150,
+ CPU_RISCVV = 170,
+
CPU_VSX = 200,
CPU_VSX3 = 201,
diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp
index 6f5b8e17885b..ac331f2154de 100644
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@@ -200,7 +200,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
# undef CV_RVV
#endif
-#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
+#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
@@ -214,6 +214,10 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#include "opencv2/core/hal/intrin_neon.hpp"
+#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
+#define CV_SIMD128_CPP 0
+#include "opencv2/core/hal/intrin_rvv071.hpp"
+
#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_vsx.hpp"
diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
index 785648575a60..e17972a3fc4a 100644
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -538,49 +538,81 @@ inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
v_int16x8& c, v_int16x8& d)
{
c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
+#if CV_NEON_AARCH64
+ d.val = vmull_high_s8(a.val, b.val);
+#else // #if CV_NEON_AARCH64
d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
+#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
v_uint16x8& c, v_uint16x8& d)
{
c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
+#if CV_NEON_AARCH64
+ d.val = vmull_high_u8(a.val, b.val);
+#else // #if CV_NEON_AARCH64
d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
+#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
v_int32x4& c, v_int32x4& d)
{
c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+#if CV_NEON_AARCH64
+ d.val = vmull_high_s16(a.val, b.val);
+#else // #if CV_NEON_AARCH64
d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
v_uint32x4& c, v_uint32x4& d)
{
c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
+#if CV_NEON_AARCH64
+ d.val = vmull_high_u16(a.val, b.val);
+#else // #if CV_NEON_AARCH64
d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
+#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
v_uint64x2& c, v_uint64x2& d)
{
c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val));
+#if CV_NEON_AARCH64
+ d.val = vmull_high_u32(a.val, b.val);
+#else // #if CV_NEON_AARCH64
d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
+#endif // #if CV_NEON_AARCH64
}
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
{
return v_int16x8(vcombine_s16(
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
- vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
+ vshrn_n_s32(
+#if CV_NEON_AARCH64
+ vmull_high_s16(a.val, b.val)
+#else // #if CV_NEON_AARCH64
+ vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
+#endif // #if CV_NEON_AARCH64
+ , 16)
));
}
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{
return v_uint16x8(vcombine_u16(
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
- vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
+ vshrn_n_u32(
+#if CV_NEON_AARCH64
+ vmull_high_u16(a.val, b.val)
+#else // #if CV_NEON_AARCH64
+ vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
+#endif // #if CV_NEON_AARCH64
+ , 16)
));
}
@@ -1254,29 +1286,56 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
inline unsigned v_reduce_sum(const v_uint8x16& a)
{
+#if CV_NEON_AARCH64
+ uint16_t t0 = vaddlvq_u8(a.val);
+ return t0;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline int v_reduce_sum(const v_int8x16& a)
{
+#if CV_NEON_AARCH64
+ int16_t t0 = vaddlvq_s8(a.val);
+ return t0;
+#else // #if CV_NEON_AARCH64
int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val));
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
return vget_lane_s32(vpadd_s32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sum(const v_uint16x8& a)
{
+#if CV_NEON_AARCH64
+ uint32_t t0 = vaddlvq_u16(a.val);
+ return t0;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(a.val);
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline int v_reduce_sum(const v_int16x8& a)
{
+#if CV_NEON_AARCH64
+ int32_t t0 = vaddlvq_s16(a.val);
+ return t0;
+#else // #if CV_NEON_AARCH64
int32x4_t t0 = vpaddlq_s16(a.val);
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
return vget_lane_s32(vpadd_s32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
+#if CV_NEON_AARCH64
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+ return v##vectorfunc##vq_##suffix(a.val); \
+}
+#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
@@ -1285,12 +1344,20 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
+#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
+#if CV_NEON_AARCH64
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+ return v##vectorfunc##vq_##suffix(a.val); \
+}
+#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
@@ -1298,18 +1365,27 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
+#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
+#if CV_NEON_AARCH64
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+ return v##vectorfunc##vq_##suffix(a.val); \
+}
+#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
}
+#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
@@ -1322,9 +1398,21 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
inline uint64 v_reduce_sum(const v_uint64x2& a)
-{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); }
+{
+#if CV_NEON_AARCH64
+ return vaddvq_u64(a.val);
+#else // #if CV_NEON_AARCH64
+ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0);
+#endif // #if CV_NEON_AARCH64
+}
inline int64 v_reduce_sum(const v_int64x2& a)
-{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); }
+{
+#if CV_NEON_AARCH64
+ return vaddvq_s64(a.val);
+#else // #if CV_NEON_AARCH64
+ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0);
+#endif // #if CV_NEON_AARCH64
+}
#if CV_SIMD128_64F
inline double v_reduce_sum(const v_float64x2& a)
{
@@ -1335,6 +1423,11 @@ inline double v_reduce_sum(const v_float64x2& a)
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{
+#if CV_NEON_AARCH64
+ float32x4_t ab = vpaddq_f32(a.val, b.val); // a0+a1 a2+a3 b0+b1 b2+b3
+ float32x4_t cd = vpaddq_f32(c.val, d.val); // c0+c1 d0+d1 c2+c3 d2+d3
+ return v_float32x4(vpaddq_f32(ab, cd)); // sumA sumB sumC sumD
+#else // #if CV_NEON_AARCH64
float32x4x2_t ab = vtrnq_f32(a.val, b.val);
float32x4x2_t cd = vtrnq_f32(c.val, d.val);
@@ -1345,49 +1438,91 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1));
return v_float32x4(vaddq_f32(v0, v1));
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
{
+#if CV_NEON_AARCH64
+ uint8x16_t t0 = vabdq_u8(a.val, b.val);
+ uint16_t t1 = vaddlvq_u8(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val)));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
{
+#if CV_NEON_AARCH64
+ uint8x16_t t0 = vreinterpretq_u8_s8(vabdq_s8(a.val, b.val));
+ uint16_t t1 = vaddlvq_u8(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val))));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
{
+#if CV_NEON_AARCH64
+ uint16x8_t t0 = vabdq_u16(a.val, b.val);
+ uint32_t t1 = vaddlvq_u16(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
{
+#if CV_NEON_AARCH64
+ uint16x8_t t0 = vreinterpretq_u16_s16(vabdq_s16(a.val, b.val));
+ uint32_t t1 = vaddlvq_u16(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val)));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
{
+#if CV_NEON_AARCH64
+ uint32x4_t t0 = vabdq_u32(a.val, b.val);
+ uint32_t t1 = vaddvq_u32(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vabdq_u32(a.val, b.val);
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
{
+#if CV_NEON_AARCH64
+ uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
+ uint32_t t1 = vaddvq_u32(t0);
+ return t1;
+#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
{
+#if CV_NEON_AARCH64
+ float32x4_t t0 = vabdq_f32(a.val, b.val);
+ return vaddvq_f32(t0);
+#else // #if CV_NEON_AARCH64
float32x4_t t0 = vabdq_f32(a.val, b.val);
float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0));
return vget_lane_f32(vpadd_f32(t1, t1), 0);
+#endif // #if CV_NEON_AARCH64
}
inline v_uint8x16 v_popcount(const v_uint8x16& a)
@@ -1409,30 +1544,54 @@ inline v_uint64x2 v_popcount(const v_int64x2& a)
inline int v_signmask(const v_uint8x16& a)
{
+#if CV_NEON_AARCH64
+ const int8x16_t signPosition = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7};
+ const uint8x16_t byteOrder = {0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15};
+ uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), signPosition);
+ uint8x16_t v1 = vqtbl1q_u8(v0, byteOrder);
+ uint32_t t0 = vaddlvq_u16(vreinterpretq_u16_u8(v1));
+ return t0;
+#else // #if CV_NEON_AARCH64
int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
+#endif // #if CV_NEON_AARCH64
}
+
inline int v_signmask(const v_int8x16& a)
{ return v_signmask(v_reinterpret_as_u8(a)); }
inline int v_signmask(const v_uint16x8& a)
{
+#if CV_NEON_AARCH64
+ const int16x8_t signPosition = {0,1,2,3,4,5,6,7};
+ uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), signPosition);
+ uint32_t t0 = vaddlvq_u16(v0);
+ return t0;
+#else // #if CV_NEON_AARCH64
int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
+#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int16x8& a)
{ return v_signmask(v_reinterpret_as_u16(a)); }
inline int v_signmask(const v_uint32x4& a)
{
+#if CV_NEON_AARCH64
+ const int32x4_t signPosition = {0,1,2,3};
+ uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), signPosition);
+ uint32_t t0 = vaddvq_u32(v0);
+ return t0;
+#else // #if CV_NEON_AARCH64
int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
uint64x2_t v1 = vpaddlq_u32(v0);
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
+#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int32x4& a)
{ return v_signmask(v_reinterpret_as_u32(a)); }
@@ -1440,9 +1599,16 @@ inline int v_signmask(const v_float32x4& a)
{ return v_signmask(v_reinterpret_as_u32(a)); }
inline int v_signmask(const v_uint64x2& a)
{
+#if CV_NEON_AARCH64
+ const int64x2_t signPosition = {0,1};
+ uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), signPosition);
+ uint64_t t0 = vaddvq_u64(v0);
+ return t0;
+#else // #if CV_NEON_AARCH64
int64x1_t m0 = vdup_n_s64(0);
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
+#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int64x2& a)
{ return v_signmask(v_reinterpret_as_u64(a)); }
@@ -1464,19 +1630,31 @@ inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signma
inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
#endif
-#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
-inline bool v_check_all(const v_##_Tpvec& a) \
-{ \
- _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
- uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
- return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
-} \
-inline bool v_check_any(const v_##_Tpvec& a) \
-{ \
- _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
- uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
- return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
-}
+#if CV_NEON_AARCH64
+ #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
+ inline bool v_check_all(const v_##_Tpvec& a) \
+ { \
+ return (vminvq_##suffix(a.val) >> shift) != 0; \
+ } \
+ inline bool v_check_any(const v_##_Tpvec& a) \
+ { \
+ return (vmaxvq_##suffix(a.val) >> shift) != 0; \
+ }
+#else // #if CV_NEON_AARCH64
+ #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
+ inline bool v_check_all(const v_##_Tpvec& a) \
+ { \
+ _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
+ uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+ return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
+ } \
+ inline bool v_check_any(const v_##_Tpvec& a) \
+ { \
+ _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
+ uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+ return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
+ }
+#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
@@ -1829,6 +2007,37 @@ inline v_int32x4 v_trunc(const v_float64x2& a)
}
#endif
+#if CV_NEON_AARCH64
+#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
+inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
+ const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
+ v_##_Tpvec& b0, v_##_Tpvec& b1, \
+ v_##_Tpvec& b2, v_##_Tpvec& b3) \
+{ \
+ /* -- Pass 1: 64b transpose */ \
+ _Tpvec##_t t0 = vreinterpretq_##suffix##32_##suffix##64( \
+ vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \
+ vreinterpretq_##suffix##64_##suffix##32(a2.val))); \
+ _Tpvec##_t t1 = vreinterpretq_##suffix##32_##suffix##64( \
+ vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \
+ vreinterpretq_##suffix##64_##suffix##32(a3.val))); \
+ _Tpvec##_t t2 = vreinterpretq_##suffix##32_##suffix##64( \
+ vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \
+ vreinterpretq_##suffix##64_##suffix##32(a2.val))); \
+ _Tpvec##_t t3 = vreinterpretq_##suffix##32_##suffix##64( \
+ vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \
+ vreinterpretq_##suffix##64_##suffix##32(a3.val))); \
+ /* -- Pass 2: 32b transpose */ \
+ b0.val = vtrn1q_##suffix##32(t0, t1); \
+ b1.val = vtrn2q_##suffix##32(t0, t1); \
+ b2.val = vtrn1q_##suffix##32(t2, t3); \
+ b3.val = vtrn2q_##suffix##32(t2, t3); \
+}
+
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f)
+#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
@@ -1854,6 +2063,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
+#endif // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
new file mode 100644
index 000000000000..2bdc622ffd49
--- /dev/null
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
@@ -0,0 +1,2545 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// Copyright (C) 2015, PingTouGe Semiconductor Co., Ltd., all rights reserved.
+
+#ifndef OPENCV_HAL_INTRIN_RISCVV_HPP
+#define OPENCV_HAL_INTRIN_RISCVV_HPP
+
+#include
+#include
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+//////////// Types ////////////
+struct v_uint8x16
+{
+ typedef uchar lane_type;
+ enum { nlanes = 16 };
+
+ v_uint8x16() {}
+ explicit v_uint8x16(vuint8m1_t v) : val(v) {}
+ v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+ uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+ {
+ uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+ val = (vuint8m1_t)vle_v_u8m1((unsigned char*)v, 16);
+ }
+ uchar get0() const
+ {
+ return vmv_x_s_u8m1_u8(val, 16);
+ }
+
+ vuint8m1_t val;
+};
+
+struct v_int8x16
+{
+ typedef schar lane_type;
+ enum { nlanes = 16 };
+
+ v_int8x16() {}
+ explicit v_int8x16(vint8m1_t v) : val(v) {}
+ v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+ schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+ {
+ schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+ val = (vint8m1_t)vle_v_i8m1((schar*)v, 16);
+ }
+ schar get0() const
+ {
+ return vmv_x_s_i8m1_i8(val, 16);
+ }
+
+ vint8m1_t val;
+};
+
+struct v_uint16x8
+{
+ typedef ushort lane_type;
+ enum { nlanes = 8 };
+
+ v_uint16x8() {}
+ explicit v_uint16x8(vuint16m1_t v) : val(v) {}
+ v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+ {
+ ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+ val = (vuint16m1_t)vle_v_u16m1((unsigned short*)v, 8);
+ }
+ ushort get0() const
+ {
+ return vmv_x_s_u16m1_u16(val, 8);
+ }
+
+ vuint16m1_t val;
+};
+
+struct v_int16x8
+{
+ typedef short lane_type;
+ enum { nlanes = 8 };
+
+ v_int16x8() {}
+ explicit v_int16x8(vint16m1_t v) : val(v) {}
+ v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+ {
+ short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+ val = (vint16m1_t)vle_v_i16m1((signed short*)v, 8);
+ }
+ short get0() const
+ {
+ return vmv_x_s_i16m1_i16(val, 8);
+ }
+
+ vint16m1_t val;
+};
+
+struct v_uint32x4
+{
+ typedef unsigned lane_type;
+ enum { nlanes = 4 };
+
+ v_uint32x4() {}
+ explicit v_uint32x4(vuint32m1_t v) : val(v) {}
+ v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+ {
+ unsigned v[] = {v0, v1, v2, v3};
+ val = (vuint32m1_t)vle_v_u32m1((unsigned int*)v, 4);
+ }
+ unsigned get0() const
+ {
+ return vmv_x_s_u32m1_u32(val, 4);
+ }
+
+ vuint32m1_t val;
+};
+
+struct v_int32x4
+{
+ typedef int lane_type;
+ enum { nlanes = 4 };
+
+ v_int32x4() {}
+ explicit v_int32x4(vint32m1_t v) : val(v) {}
+ v_int32x4(int v0, int v1, int v2, int v3)
+ {
+ int v[] = {v0, v1, v2, v3};
+ val = (vint32m1_t)vle_v_i32m1((signed int*)v, 4);
+ }
+ int get0() const
+ {
+ return vmv_x_s_i32m1_i32(val, 4);
+ }
+ vint32m1_t val;
+};
+
+struct v_float32x4
+{
+ typedef float lane_type;
+ enum { nlanes = 4 };
+
+ v_float32x4() {}
+ explicit v_float32x4(vfloat32m1_t v) : val(v) {}
+ v_float32x4(float v0, float v1, float v2, float v3)
+ {
+ float v[] = {v0, v1, v2, v3};
+ val = (vfloat32m1_t)vle_v_f32m1((float*)v, 4);
+ }
+ float get0() const
+ {
+ return vfmv_f_s_f32m1_f32(val, 4);
+ }
+ vfloat32m1_t val;
+};
+
+struct v_uint64x2
+{
+ typedef uint64 lane_type;
+ enum { nlanes = 2 };
+
+ v_uint64x2() {}
+ explicit v_uint64x2(vuint64m1_t v) : val(v) {}
+ v_uint64x2(uint64 v0, uint64 v1)
+ {
+ uint64 v[] = {v0, v1};
+ val = (vuint64m1_t)vle_v_u64m1((unsigned long*)v, 2);
+ }
+ uint64 get0() const
+ {
+ return vmv_x_s_u64m1_u64(val, 2);
+ }
+ vuint64m1_t val;
+};
+
+struct v_int64x2
+{
+ typedef int64 lane_type;
+ enum { nlanes = 2 };
+
+ v_int64x2() {}
+ explicit v_int64x2(vint64m1_t v) : val(v) {}
+ v_int64x2(int64 v0, int64 v1)
+ {
+ int64 v[] = {v0, v1};
+ val = (vint64m1_t)vle_v_i64m1((long*)v, 2);
+ }
+ int64 get0() const
+ {
+ return vmv_x_s_i64m1_i64(val, 2);
+ }
+ vint64m1_t val;
+};
+
+struct v_float64x2
+{
+ typedef double lane_type;
+ enum { nlanes = 2 };
+
+ v_float64x2() {}
+ explicit v_float64x2(vfloat64m1_t v) : val(v) {}
+ v_float64x2(double v0, double v1)
+ {
+ double v[] = {v0, v1};
+ val = (vfloat64m1_t)vle_v_f64m1((double*)v, 2);
+ }
+ double get0() const
+ {
+ return vfmv_f_s_f64m1_f64(val, 2);
+ }
+ vfloat64m1_t val;
+};
+
+#define OPENCV_HAL_IMPL_RISCVV_INIT(_Tpv, _Tp, suffix) \
+inline _Tp##m1_t vreinterpretq_##suffix##_##suffix(_Tp##m1_t v) { return v; } \
+inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16((vuint8m1_t)(v.val)); } \
+inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16((vint8m1_t)(v.val)); } \
+inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8((vuint16m1_t)(v.val)); } \
+inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8((vint16m1_t)(v.val)); } \
+inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4((vuint32m1_t)(v.val)); } \
+inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4((vint32m1_t)(v.val)); } \
+inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2((vuint64m1_t)(v.val)); } \
+inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2((vint64m1_t)(v.val)); } \
+inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4((vfloat32m1_t)(v.val)); }\
+inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2((vfloat64m1_t)(v.val)); }
+
+
+OPENCV_HAL_IMPL_RISCVV_INIT(uint8x16, vuint8, u8)
+OPENCV_HAL_IMPL_RISCVV_INIT(int8x16, vint8, s8)
+OPENCV_HAL_IMPL_RISCVV_INIT(uint16x8, vuint16, u16)
+OPENCV_HAL_IMPL_RISCVV_INIT(int16x8, vint16, s16)
+OPENCV_HAL_IMPL_RISCVV_INIT(uint32x4, vuint32, u32)
+OPENCV_HAL_IMPL_RISCVV_INIT(int32x4, vint32, s32)
+OPENCV_HAL_IMPL_RISCVV_INIT(uint64x2, vuint64, u64)
+OPENCV_HAL_IMPL_RISCVV_INIT(int64x2, vint64, s64)
+OPENCV_HAL_IMPL_RISCVV_INIT(float64x2, vfloat64, f64)
+OPENCV_HAL_IMPL_RISCVV_INIT(float32x4, vfloat32, f32)
+#define OPENCV_HAL_IMPL_RISCVV_INIT_SET(__Tp, _Tp, suffix, len, num) \
+inline v_##_Tp##x##num v_setzero_##suffix() { return v_##_Tp##x##num((v##_Tp##m1_t){0}); } \
+inline v_##_Tp##x##num v_setall_##suffix(__Tp v) { return v_##_Tp##x##num(vmv_v_x_##len##m1(v, num)); }
+
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(uchar, uint8, u8, u8, 16)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(char, int8, s8, i8, 16)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(ushort, uint16, u16, u16, 8)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(short, int16, s16, i16, 8)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(unsigned int, uint32, u32, u32, 4)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(int, int32, s32, i32, 4)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(unsigned long, uint64, u64, u64, 2)
+OPENCV_HAL_IMPL_RISCVV_INIT_SET(long, int64, s64, i64, 2)
+inline v_float32x4 v_setzero_f32() { return v_float32x4((vfloat32m1_t){0}); }
+inline v_float32x4 v_setall_f32(float v) { return v_float32x4(vfmv_v_f_f32m1(v, 4)); }
+
+inline v_float64x2 v_setzero_f64() { return v_float64x2(vfmv_v_f_f64m1(0, 2)); }
+inline v_float64x2 v_setall_f64(double v) { return v_float64x2(vfmv_v_f_f64m1(v, 2)); }
+
+
+#define OPENCV_HAL_IMPL_RISCVV_BIN_OP(bin_op, _Tpvec, intrin) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ return _Tpvec(intrin(a.val, b.val)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+ a.val = intrin(a.val, b.val); \
+ return a; \
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_BIN_OPN(bin_op, _Tpvec, intrin, num) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ return _Tpvec(intrin(a.val, b.val, num)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+ a.val = intrin(a.val, b.val, num); \
+ return a; \
+}
+
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint8x16, vsaddu_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint8x16, vssubu_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int8x16, vsadd_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int8x16, vssub_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint16x8, vsaddu_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint16x8, vssubu_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int16x8, vsadd_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int16x8, vssub_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int32x4, vsadd_vv_i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int32x4, vssub_vv_i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_int32x4, vmul_vv_i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint32x4, vadd_vv_u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint32x4, vsub_vv_u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_uint32x4, vmul_vv_u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int64x2, vsadd_vv_i64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int64x2, vssub_vv_i64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint64x2, vadd_vv_u64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint64x2, vsub_vv_u64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_float32x4, vfadd_vv_f32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_float32x4, vfsub_vv_f32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_float32x4, vfmul_vv_f32m1, 4)
+inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
+{
+ return v_float32x4(vfdiv_vv_f32m1(a.val, b.val, 4));
+}
+inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
+{
+ a.val = vfdiv_vv_f32m1(a.val, b.val, 4);
+ return a;
+}
+
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_float64x2, vfadd_vv_f64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_float64x2, vfsub_vv_f64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_float64x2, vfmul_vv_f64m1, 2)
+inline v_float64x2 operator / (const v_float64x2& a, const v_float64x2& b)
+{
+ return v_float64x2(vfdiv_vv_f64m1(a.val, b.val, 2));
+}
+inline v_float64x2& operator /= (v_float64x2& a, const v_float64x2& b)
+{
+ a.val = vfdiv_vv_f64m1(a.val, b.val, 2);
+ return a;
+}
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_RISCVV_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+ return _Tpvec(intrin(a.val, b.val)); \
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(_Tpvec, func, intrin, num) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+ return _Tpvec(intrin(a.val, b.val, num)); \
+}
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 2)
+
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+ return v_float32x4(vfsqrt_v_f32m1(x.val, 4));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+ return v_float32x4(vfrdiv_vf_f32m1(vfsqrt_v_f32m1(x.val, 4), 1, 4));
+}
+
+inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+ v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a.val, a.val, 4), b.val, b.val, 4));
+ return v_sqrt(x);
+}
+
+inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+ return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a.val, a.val, 4), b.val, b.val, 4));
+}
+
+inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+ return v_float32x4(vfmacc_vv_f32m1(c.val, a.val, b.val, 4));
+}
+
+inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+ return v_int32x4(vmacc_vv_i32m1(c.val, a.val, b.val, 4));
+}
+
+inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+ return v_fma(a, b, c);
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+ return v_fma(a, b, c);
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+ const v_float32x4& m1, const v_float32x4& m2,
+ const v_float32x4& m3)
+{
+ vfloat32m1_t res = vfmul_vf_f32m1(m0.val, v.val[0], 4);//vmuli_f32(m0.val, v.val, 0);
+ res = vfmacc_vf_f32m1(res, v.val[1], m1.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ res = vfmacc_vf_f32m1(res, v.val[2], m2.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ res = vfmacc_vf_f32m1(res, v.val[3], m3.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ return v_float32x4(res);
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+ const v_float32x4& m1, const v_float32x4& m2,
+ const v_float32x4& a)
+{
+ vfloat32m1_t res = vfmul_vf_f32m1(m0.val, v.val[0], 4);//vmuli_f32(m0.val, v.val, 0);
+ res = vfmacc_vf_f32m1(res, v.val[1], m1.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ res = vfmacc_vf_f32m1(res, v.val[2], m2.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ res = vfadd_vv_f32m1(res, a.val, 4);//vmulai_f32(res, m1.val, v.val, 1);
+ return v_float32x4(res);
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{
+ return v_float64x2(vfsqrt_v_f64m1(x.val, 2));
+}
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+ return v_float64x2(vfrdiv_vf_f64m1(vfsqrt_v_f64m1(x.val, 2), 1, 2));
+}
+
+inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+ v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a.val, a.val, 2), b.val, b.val, 2));
+ return v_sqrt(x);
+}
+
+inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+ return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a.val, a.val, 2), b.val, b.val, 2));
+}
+
+inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+ return v_float64x2(vfmacc_vv_f64m1(c.val, a.val, b.val, 2));
+}
+
+inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+ return v_fma(a, b, c);
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(_Tpvec, suffix, num) \
+ OPENCV_HAL_IMPL_RISCVV_BIN_OPN(&, _Tpvec, vand_vv_##suffix, num) \
+ OPENCV_HAL_IMPL_RISCVV_BIN_OPN(|, _Tpvec, vor_vv_##suffix, num) \
+ OPENCV_HAL_IMPL_RISCVV_BIN_OPN(^, _Tpvec, vxor_vv_##suffix, num) \
+ inline _Tpvec operator ~ (const _Tpvec & a) \
+ { \
+ return _Tpvec(vnot_v_##suffix(a.val, num)); \
+ }
+
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint8x16, u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint16x8, u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint32x4, u32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint64x2, u64m1, 2)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int8x16, i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int16x8, i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int32x4, i32m1, 4)
+OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int64x2, i64m1, 2)
+
+#define OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(bin_op, intrin) \
+inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
+{ \
+ return v_float32x4(vfloat32m1_t(intrin(vint32m1_t(a.val), vint32m1_t(b.val), 4))); \
+} \
+inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
+{ \
+ a.val = vfloat32m1_t(intrin(vint32m1_t(a.val), vint32m1_t(b.val), 4)); \
+ return a; \
+}
+
+OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(&, vand_vv_i32m1)
+OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(|, vor_vv_i32m1)
+OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(^, vxor_vv_i32m1)
+
+inline v_float32x4 operator ~ (const v_float32x4& a)
+{
+ return v_float32x4((vfloat32m1_t)(vnot_v_i32m1((vint32m1_t)(a.val), 4)));
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(bin_op, intrin) \
+inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
+{ \
+ return v_float64x2(vfloat64m1_t(intrin(vint64m1_t(a.val), vint64m1_t(b.val), 2))); \
+} \
+inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
+{ \
+ a.val = vfloat64m1_t(intrin(vint64m1_t(a.val), vint64m1_t(b.val), 2)); \
+ return a; \
+}
+
+OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(&, vand_vv_i64m1)
+OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(|, vor_vv_i64m1)
+OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(^, vxor_vv_i64m1)
+
+inline v_float64x2 operator ~ (const v_float64x2& a)
+{
+ return v_float64x2((vfloat64m1_t)(vnot_v_i64m1((vint64m1_t)(a.val), 2)));
+}
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
+{
+ return v_int16x8(vmulh_vv_i16m1(a.val, b.val, 8));
+}
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
+{
+ return v_uint16x8(vmulhu_vv_u16m1(a.val, b.val, 8));
+}
+
+//#define OPENCV_HAL_IMPL_RISCVV_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+//inline _Tpuvec v_abs(const _Tpsvec& a) { \
+// E##xm1_t mask=vmflt_vf_e32xm1_f32m1(x.val, 0.0, 4);
+
+//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint8x16, v_int8x16, u8, s8)
+//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint16x8, v_int16x8, u16, s16)
+//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint32x4, v_int32x4, u32, s32)
+
+inline v_uint32x4 v_abs(v_int32x4 x)
+{
+ vbool32_t mask=vmslt_vx_i32m1_b32(x.val, 0, 4);
+ return v_uint32x4((vuint32m1_t)vrsub_vx_i32m1_m(mask, x.val, x.val, 0, 4));
+}
+
+inline v_uint16x8 v_abs(v_int16x8 x)
+{
+ vbool16_t mask=vmslt_vx_i16m1_b16(x.val, 0, 8);
+ return v_uint16x8((vuint16m1_t)vrsub_vx_i16m1_m(mask, x.val, x.val, 0, 8));
+}
+
+inline v_uint8x16 v_abs(v_int8x16 x)
+{
+ vbool8_t mask=vmslt_vx_i8m1_b8(x.val, 0, 16);
+ return v_uint8x16((vuint8m1_t)vrsub_vx_i8m1_m(mask, x.val, x.val, 0, 16));
+}
+
+inline v_float32x4 v_abs(v_float32x4 x)
+{
+ return (v_float32x4)vfsgnjx_vv_f32m1(x.val, x.val, 4);
+}
+
+inline v_float64x2 v_abs(v_float64x2 x)
+{
+ return (v_float64x2)vfsgnjx_vv_f64m1(x.val, x.val, 2);
+}
+
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{
+ vfloat32m1_t ret = vfsub_vv_f32m1(a.val, b.val, 4);
+ return (v_float32x4)vfsgnjx_vv_f32m1(ret, ret, 4);
+}
+
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{
+ vfloat64m1_t ret = vfsub_vv_f64m1(a.val, b.val, 2);
+ return (v_float64x2)vfsgnjx_vv_f64m1(ret, ret, 2);
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(bit, num) \
+inline v_uint##bit##x##num v_absdiff(v_uint##bit##x##num a, v_uint##bit##x##num b){ \
+ vuint##bit##m1_t vmax = vmaxu_vv_u##bit##m1(a.val, b.val, num); \
+ vuint##bit##m1_t vmin = vminu_vv_u##bit##m1(a.val, b.val, num); \
+ return v_uint##bit##x##num(vsub_vv_u##bit##m1(vmax, vmin, num));\
+}
+
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(8, 16)
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(16, 8)
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(32, 4)
+
+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(v_int8x16 a, v_int8x16 b){
+ vint8m1_t vmax = vmax_vv_i8m1(a.val, b.val, 16);
+ vint8m1_t vmin = vmin_vv_i8m1(a.val, b.val, 16);
+ return v_int8x16(vssub_vv_i8m1(vmax, vmin, 16));
+}
+inline v_int16x8 v_absdiffs(v_int16x8 a, v_int16x8 b){
+ vint16m1_t vmax = vmax_vv_i16m1(a.val, b.val, 8);
+ vint16m1_t vmin = vmin_vv_i16m1(a.val, b.val, 8);
+ return v_int16x8(vssub_vv_i16m1(vmax, vmin, 8));
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_ABSDIFF(_Tpvec, _Tpv, num) \
+inline v_uint##_Tpvec v_absdiff(v_int##_Tpvec a, v_int##_Tpvec b){ \
+ vint##_Tpv##_t max = vmax_vv_i##_Tpv(a.val, b.val, num);\
+ vint##_Tpv##_t min = vmin_vv_i##_Tpv(a.val, b.val, num);\
+ return v_uint##_Tpvec((vuint##_Tpv##_t)vsub_vv_i##_Tpv(max, min, num)); \
+}
+
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF(8x16, 8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF(16x8, 16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_ABSDIFF(32x4, 32m1, 4)
+
+// Multiply and expand
+inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
+ v_int16x8& c, v_int16x8& d)
+{
+ vint16m2_t res = vundefined_i16m2();
+ res = vwmul_vv_i16m2(a.val, b.val, 16);
+ c.val = vget_i16m2_i16m1(res, 0);
+ d.val = vget_i16m2_i16m1(res, 1);
+}
+
+inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
+ v_uint16x8& c, v_uint16x8& d)
+{
+ vuint16m2_t res = vundefined_u16m2();
+ res = vwmulu_vv_u16m2(a.val, b.val, 16);
+ c.val = vget_u16m2_u16m1(res, 0);
+ d.val = vget_u16m2_u16m1(res, 1);
+}
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+ v_int32x4& c, v_int32x4& d)
+{
+ vint32m2_t res = vundefined_i32m2();
+ res = vwmul_vv_i32m2(a.val, b.val, 8);
+ c.val = vget_i32m2_i32m1(res, 0);
+ d.val = vget_i32m2_i32m1(res, 1);
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+ v_uint32x4& c, v_uint32x4& d)
+{
+ vuint32m2_t res = vundefined_u32m2();
+ res = vwmulu_vv_u32m2(a.val, b.val, 8);
+ c.val = vget_u32m2_u32m1(res, 0);
+ d.val = vget_u32m2_u32m1(res, 1);
+}
+
+inline void v_mul_expand(const v_int32x4& a, const v_int32x4& b,
+ v_int64x2& c, v_int64x2& d)
+{
+ vint64m2_t res = vundefined_i64m2();
+ res = vwmul_vv_i64m2(a.val, b.val, 4);
+ c.val = vget_i64m2_i64m1(res, 0);
+ d.val = vget_i64m2_i64m1(res, 1);
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+ v_uint64x2& c, v_uint64x2& d)
+{
+ vuint64m2_t res = vundefined_u64m2();
+ res = vwmulu_vv_u64m2(a.val, b.val, 4);
+ c.val = vget_u64m2_u64m1(res, 0);
+ d.val = vget_u64m2_u64m1(res, 1);
+}
+
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 16)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 8)
+OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 8)
+//////// Dot Product ////////
+// 16 >> 32
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+ vint32m2_t res = vundefined_i32m2();
+ res = vwmul_vv_i32m2(a.val, b.val, 8);
+ res = vrgather_vv_i32m2(res, (vuint32m2_t){0, 2, 4, 6, 1, 3, 5, 7}, 8);
+ return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(res, 0), vget_i32m2_i32m1(res, 1), 4));
+}
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+ vint32m2_t res = vundefined_i32m2();
+ res = vwmul_vv_i32m2(a.val, b.val, 8);
+ res = vrgather_vv_i32m2(res, (vuint32m2_t){0, 2, 4, 6, 1, 3, 5, 7}, 8);
+ return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(res, 0),vget_i32m2_i32m1(res, 1), 4), c.val, 4));
+}
+
+// 32 >> 64
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{
+ vint64m2_t res = vundefined_i64m2();
+ res = vwmul_vv_i64m2(a.val, b.val, 4);
+ res = vrgather_vv_i64m2(res, (vuint64m2_t){0, 2, 1, 3}, 4);
+ return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(res, 0), vget_i64m2_i64m1(res, 1), 2));
+}
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{
+ vint64m2_t res = vundefined_i64m2();
+ res = vwmul_vv_i64m2(a.val, b.val, 4);
+ res = vrgather_vv_i64m2(res, (vuint64m2_t){0, 2, 1, 3}, 4);
+ return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(res, 0), vget_i64m2_i64m1(res, 1), 2), c.val, 2));
+}
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{
+ vuint16m2_t v1 = vundefined_u16m2();
+ vuint32m2_t v2 = vundefined_u32m2();
+ v1 = vwmulu_vv_u16m2(a.val, b.val, 16);
+ v1 = vrgather_vv_u16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16);
+ v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8);
+ return v_uint32x4(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4));
+}
+
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b,
+ const v_uint32x4& c)
+{
+ vuint16m2_t v1 = vundefined_u16m2();
+ vuint32m2_t v2 = vundefined_u32m2();
+ v1 = vwmulu_vv_u16m2(a.val, b.val, 16);
+ v1 = vrgather_vv_u16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16);
+ v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8);
+ return v_uint32x4(vadd_vv_u32m1(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4), c.val, 4));
+}
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+ vint16m2_t v1 = vundefined_i16m2();
+ vint32m2_t v2 = vundefined_i32m2();
+ v1 = vwmul_vv_i16m2(a.val, b.val, 16);
+ v1 = vrgather_vv_i16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16);
+ v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8);
+ return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4));
+}
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b,
+ const v_int32x4& c)
+{
+ vint16m2_t v1 = vundefined_i16m2();
+ vint32m2_t v2 = vundefined_i32m2();
+ v1 = vwmul_vv_i16m2(a.val, b.val, 16);
+ v1 = vrgather_vv_i16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16);
+ v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8);
+ return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4), c.val, 4));
+}
+
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+ vuint32m2_t v1 = vundefined_u32m2();
+ vuint64m2_t v2 = vundefined_u64m2();
+ v1 = vwmulu_vv_u32m2(a.val, b.val, 8);
+ v1 = vrgather_vv_u32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8);
+ v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4);
+ return v_uint64x2(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2));
+}
+
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b,
+ const v_uint64x2& c)
+{
+ vuint32m2_t v1 = vundefined_u32m2();
+ vuint64m2_t v2 = vundefined_u64m2();
+ v1 = vwmulu_vv_u32m2(a.val, b.val, 8);
+ v1 = vrgather_vv_u32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8);
+ v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4);
+ return v_uint64x2(vadd_vv_u64m1(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2), c.val, 2));
+}
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ vint64m2_t v2 = vundefined_i64m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ v1 = vrgather_vv_i32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8);
+ v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4);
+ return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2));
+}
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b,
+ const v_int64x2& c)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ vint64m2_t v2 = vundefined_i64m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ v1 = vrgather_vv_i32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8);
+ v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4);
+ return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2), c.val, 2));
+}
+
+//////// Fast Dot Product ////////
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4));
+}
+
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4), c.val, 4));
+}
+
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{
+ vint64m2_t v1 = vundefined_i64m2();
+ v1 = vwmul_vv_i64m2(a.val, b.val, 4);
+ return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 2));
+}
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{
+ vint64m2_t v1 = vundefined_i64m2();
+ v1 = vwmul_vv_i64m2(a.val, b.val, 8);
+ return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 4), c.val, 4));
+}
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{
+ vuint16m2_t v1 = vundefined_u16m2();
+ vuint32m2_t v2 = vundefined_u32m2();
+ v1 = vwmulu_vv_u16m2(a.val, b.val, 16);
+ v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8);
+ return v_uint32x4(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4));
+}
+
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{
+ vuint16m2_t v1 = vundefined_u16m2();
+ vuint32m2_t v2 = vundefined_u32m2();
+ v1 = vwmulu_vv_u16m2(a.val, b.val, 16);
+ v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8);
+ return v_uint32x4(vadd_vv_u32m1(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4), c.val, 4));
+}
+
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{
+ vint16m2_t v1 = vundefined_i16m2();
+ vint32m2_t v2 = vundefined_i32m2();
+ v1 = vwmul_vv_i16m2(a.val, b.val, 16);
+ v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8);
+ return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4));
+}
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{
+ vint16m2_t v1 = vundefined_i16m2();
+ vint32m2_t v2 = vundefined_i32m2();
+ v1 = vwmul_vv_i16m2(a.val, b.val, 16);
+ v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8);
+ return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4), c.val, 4));
+}
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{
+ vuint32m2_t v1 = vundefined_u32m2();
+ vuint64m2_t v2 = vundefined_u64m2();
+ v1 = vwmulu_vv_u32m2(a.val, b.val, 8);
+ v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4);
+ return v_uint64x2(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2));
+}
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{
+ vuint32m2_t v1 = vundefined_u32m2();
+ vuint64m2_t v2 = vundefined_u64m2();
+ v1 = vwmulu_vv_u32m2(a.val, b.val, 8);
+ v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4);
+ return v_uint64x2(vadd_vv_u64m1(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2), c.val, 2));
+}
+
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ vint64m2_t v2 = vundefined_i64m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4);
+ return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2));
+}
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{
+ vint32m2_t v1 = vundefined_i32m2();
+ vint64m2_t v2 = vundefined_i64m2();
+ v1 = vwmul_vv_i32m2(a.val, b.val, 8);
+ v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4);
+ return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2), c.val, 2));
+}
+
+
+#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(_Tpvec, _Tpvec2, len, scalartype, func, intrin, num) \
+inline scalartype v_reduce_##func(const v_##_Tpvec##x##num& a) \
+{\
+ v##_Tpvec2##m1_t val = vmv_v_x_##len##m1(0, num); \
+ val = intrin(val, a.val, val, num); \
+ return vmv_x_s_##len##m1_##len(val, num); \
+}
+
+
+#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(_Tpvec, _Tpvec2, scalartype, func, funcu, num) \
+inline scalartype v_reduce_##func(const v_##_Tpvec##x##num& a) \
+{\
+ v##_Tpvec##m1_t val = (v##_Tpvec##m1_t)vmv_v_x_i8m1(0, num); \
+ val = v##funcu##_vs_##_Tpvec2##m1_##_Tpvec2##m1(val, a.val, a.val, num); \
+ return val[0]; \
+}
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int8, int16, i16, int, sum, vwredsum_vs_i8m1_i16m1, 16)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int16, int32, i32, int, sum, vwredsum_vs_i16m1_i32m1, 8)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int32, int64, i64, int, sum, vwredsum_vs_i32m1_i64m1, 4)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint8, uint16, u16, unsigned, sum, vwredsumu_vs_u8m1_u16m1, 16)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint16, uint32, u32, unsigned, sum, vwredsumu_vs_u16m1_u32m1, 8)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint32, uint64, u64, unsigned, sum, vwredsumu_vs_u32m1_u64m1, 4)
+inline float v_reduce_sum(const v_float32x4& a) \
+{\
+ vfloat32m1_t val = vfmv_v_f_f32m1(0.0, 4); \
+ val = vfredsum_vs_f32m1_f32m1(val, a.val, val, 4); \
+ return vfmv_f_s_f32m1_f32(val, 4); \
+}
+inline double v_reduce_sum(const v_float64x2& a) \
+{\
+ vfloat64m1_t val = vfmv_v_f_f64m1(0.0, 2); \
+ val = vfredsum_vs_f64m1_f64m1(val, a.val, val, 2); \
+ return vfmv_f_s_f64m1_f64(val, 2); \
+}
+inline uint64 v_reduce_sum(const v_uint64x2& a)
+{ return vext_x_v_u64m1_u64((vuint64m1_t)a.val, 0, 2)+vext_x_v_u64m1_u64((vuint64m1_t)a.val, 1, 2); }
+
+inline int64 v_reduce_sum(const v_int64x2& a)
+{ return vext_x_v_i64m1_i64((vint64m1_t)a.val, 0, 2)+vext_x_v_i64m1_i64((vint64m1_t)a.val, 1, 2); }
+
+#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(func) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int8, i8, int, func, red##func, 16) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int16, i16, int, func, red##func, 8) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int32, i32, int, func, red##func, 4) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int64, i64, int, func, red##func, 2) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint8, u8, unsigned, func, red##func##u, 16) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint16, u16, unsigned, func, red##func##u, 8) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint32, u32, unsigned, func, red##func##u, 4) \
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(float32, f32, float, func, fred##func, 4)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(max)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(min)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+ const v_float32x4& c, const v_float32x4& d)
+{
+ vfloat32m1_t a0 = vfmv_v_f_f32m1(0.0, 4);
+ vfloat32m1_t b0 = vfmv_v_f_f32m1(0.0, 4);
+ vfloat32m1_t c0 = vfmv_v_f_f32m1(0.0, 4);
+ vfloat32m1_t d0 = vfmv_v_f_f32m1(0.0, 4);
+ a0 = vfredsum_vs_f32m1_f32m1(a0, a.val, a0, 4);
+ b0 = vfredsum_vs_f32m1_f32m1(b0, b.val, b0, 4);
+ c0 = vfredsum_vs_f32m1_f32m1(c0, c.val, c0, 4);
+ d0 = vfredsum_vs_f32m1_f32m1(d0, d.val, d0, 4);
+ return v_float32x4(a0[0], b0[0], c0[0], d0[0]);
+}
+
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+ vfloat32m1_t a0 = vfmv_v_f_f32m1(0.0, 4);
+ vfloat32m1_t x = vfsub_vv_f32m1(a.val, b.val, 4);
+ vbool32_t mask=vmflt_vf_f32m1_b32(x, 0, 4);
+ vfloat32m1_t val = vfrsub_vf_f32m1_m(mask, x, x, 0, 4);
+ a0 = vfredsum_vs_f32m1_f32m1(a0, val, a0, 4);
+ return a0[0];
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(_Tpvec, _Tpvec2) \
+inline unsigned v_reduce_sad(const _Tpvec& a, const _Tpvec&b){ \
+ _Tpvec2 x = v_absdiff(a, b); \
+ return v_reduce_sum(x); \
+}
+
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int8x16, v_uint8x16)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint8x16, v_uint8x16)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int16x8, v_uint16x8)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint16x8, v_uint16x8)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int32x4, v_uint32x4)
+OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint32x4, v_uint32x4)
+
+#define OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(_Tpvec, _Tp, _T, num, uv) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmseq_vv_##_Tp##_b##_T(a.val, b.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmsne_vv_##_Tp##_b##_T(a.val, b.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmslt##uv##_Tp##_b##_T(a.val, b.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmslt##uv##_Tp##_b##_T(b.val, a.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmsle##uv##_Tp##_b##_T(a.val, b.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ \
+ vbool##_T##_t mask = vmsle##uv##_Tp##_b##_T(b.val, a.val, num); \
+ return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \
+} \
+
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int8x16, i8m1, 8, 16, _vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int16x8, i16m1, 16, 8, _vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int32x4, i32m1, 32, 4, _vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int64x2, i64m1, 64, 2, _vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint8x16, u8m1, 8, 16, u_vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint16x8, u16m1, 16, 8, u_vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint32x4, u32m1, 32, 4, u_vv_)
+OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint64x2, u64m1, 64, 2, u_vv_)
+
+//TODO: ==
+inline v_float32x4 operator == (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmfeq_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 operator != (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmfne_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 operator < (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmflt_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 operator <= (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmfle_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 operator > (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmfgt_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 operator >= (const v_float32x4& a, const v_float32x4& b)
+{
+ vbool32_t mask = vmfge_vv_f32m1_b32(a.val, b.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{
+ vbool32_t mask = vmford_vv_f32m1_b32(a.val, a.val, 4);
+ vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4);
+ return v_float32x4((vfloat32m1_t)res);
+}
+
+//TODO: ==
+inline v_float64x2 operator == (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmfeq_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 operator != (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmfne_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 operator < (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmflt_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 operator <= (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmfle_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 operator > (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmfgt_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 operator >= (const v_float64x2& a, const v_float64x2& b)
+{
+ vbool64_t mask = vmfge_vv_f64m1_b64(a.val, b.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{
+ vbool64_t mask = vmford_vv_f64m1_b64(a.val, a.val, 2);
+ vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2);
+ return v_float64x2((vfloat64m1_t)res);
+}
+#define OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(_Tp, _T) \
+inline void v_transpose4x4(const v_##_Tp##32x4& a0, const v_##_Tp##32x4& a1, \
+ const v_##_Tp##32x4& a2, const v_##_Tp##32x4& a3, \
+ v_##_Tp##32x4& b0, v_##_Tp##32x4& b1, \
+ v_##_Tp##32x4& b2, v_##_Tp##32x4& b3) \
+{ \
+ v##_Tp##32m4_t val = vundefined_##_T##m4(); \
+ val = vset_##_T##m4(val, 0, a0.val); \
+ val = vset_##_T##m4(val, 1, a1.val); \
+ val = vset_##_T##m4(val, 2, a2.val); \
+ val = vset_##_T##m4(val, 3, a3.val); \
+ val = vrgather_vv_##_T##m4(val, (vuint32m4_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); \
+ b0.val = vget_##_T##m4_##_T##m1(val, 0); \
+ b1.val = vget_##_T##m4_##_T##m1(val, 1); \
+ b2.val = vget_##_T##m4_##_T##m1(val, 2); \
+ b3.val = vget_##_T##m4_##_T##m1(val, 3); \
+}
+OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(uint, u32)
+OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(int, i32)
+OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(float, f32)
+
+
+#define OPENCV_HAL_IMPL_RISCVV_SHIFT_LEFT(_Tpvec, suffix, _T, num) \
+inline _Tpvec operator << (const _Tpvec& a, int n) \
+{ return _Tpvec((vsll_vx_##_T##m1(a.val, n, num))); } \
+template inline _Tpvec v_shl(const _Tpvec& a) \
+{ return _Tpvec((vsll_vx_##_T##m1(a.val, n, num))); }
+
+#define OPENCV_HAL_IMPL_RISCVV_SHIFT_RIGHT(_Tpvec, suffix, _T, num, intric) \
+inline _Tpvec operator >> (const _Tpvec& a, int n) \
+{ return _Tpvec((v##intric##_vx_##_T##m1(a.val, n, num))); } \
+template inline _Tpvec v_shr(const _Tpvec& a) \
+{ return _Tpvec((v##intric##_vx_##_T##m1(a.val, n, num))); }\
+template inline _Tpvec v_rshr(const _Tpvec& a) \
+{ return _Tpvec((v##intric##_vx_##_T##m1(vadd_vx_##_T##m1(a.val, 1<<(n-1), num), n, num))); }
+
+// trade efficiency for convenience
+#define OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(suffix, _T, num, intrin) \
+OPENCV_HAL_IMPL_RISCVV_SHIFT_LEFT(v_##suffix##x##num, suffix, _T, num) \
+OPENCV_HAL_IMPL_RISCVV_SHIFT_RIGHT(v_##suffix##x##num, suffix, _T, num, intrin)
+
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint8, u8, 16, srl)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint16, u16, 8, srl)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint32, u32, 4, srl)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint64, u64, 2, srl)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int8, i8, 16, sra)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int16, i16, 8, sra)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int32, i32, 4, sra)
+OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int64, i64, 2, sra)
+
+#if 0
+#define VUP4(n) {0, 1, 2, 3}
+#define VUP8(n) {0, 1, 2, 3, 4, 5, 6, 7}
+#define VUP16(n) {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+#define VUP2(n) {0, 1}
+#endif
+#define OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(_Tpvec, suffix, _T, num, num2, vmv, len) \
+template inline _Tpvec v_rotate_left(const _Tpvec& a) \
+{ \
+ suffix##m1_t tmp = vmv##_##_T##m1(0, num);\
+ tmp = vslideup_vx_##_T##m1_m(vmset_m_##len(num), tmp, a.val, n, num);\
+ return _Tpvec(tmp);\
+} \
+template inline _Tpvec v_rotate_right(const _Tpvec& a) \
+{ \
+ return _Tpvec(vslidedown_vx_##_T##m1(a.val, n, num));\
+} \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
+{ return a; } \
+template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
+{ \
+ suffix##m2_t tmp = vundefined_##_T##m2(); \
+ tmp = vset_##_T##m2(tmp, 0, a.val); \
+ tmp = vset_##_T##m2(tmp, 1, b.val); \
+ tmp = vslidedown_vx_##_T##m2(tmp, n, num2);\
+ return _Tpvec(vget_##_T##m2_##_T##m1(tmp, 0));\
+} \
+template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
+{ \
+ suffix##m2_t tmp = vundefined_##_T##m2(); \
+ tmp = vset_##_T##m2(tmp, 0, b.val); \
+ tmp = vset_##_T##m2(tmp, 1, a.val); \
+ tmp = vslideup_vx_##_T##m2(tmp, n, num2);\
+ return _Tpvec(vget_##_T##m2_##_T##m1(tmp, 1));\
+} \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
+{ \
+ CV_UNUSED(b); return a; \
+}
+
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint8x16, vuint8, u8, 16, 32, vmv_v_x, b8)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int8x16, vint8, i8, 16, 32, vmv_v_x, b8)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint16x8, vuint16, u16, 8, 16, vmv_v_x, b16)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int16x8, vint16, i16, 8, 16, vmv_v_x, b16)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint32x4, vuint32, u32, 4, 8, vmv_v_x, b32)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int32x4, vint32, i32, 4, 8, vmv_v_x, b32)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint64x2, vuint64, u64, 2, 4, vmv_v_x, b64)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int64x2, vint64, i64, 2, 4, vmv_v_x, b64)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_float32x4, vfloat32, f32, 4, 8, vfmv_v_f, b32)
+OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_float64x2, vfloat64, f64, 2, 4, vfmv_v_f, b64)
+
+#define OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(_Tpvec, _Tp, _Tp2, len, hnum, num) \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+ typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \
+ vuint64m1_t tmp = {*(unaligned_uint64*)ptr0, *(unaligned_uint64*)ptr1};\
+ return _Tpvec(_Tp2##_t(tmp)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(vle_v_##len(ptr, hnum)); }\
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(vle_v_##len(ptr, num)); } \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec((_Tp2##_t)vle_v_##len((const _Tp *)ptr, num)); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ vse_v_##len(ptr, a.val, hnum);}\
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ \
+ _Tp2##_t a0 = vslidedown_vx_##len(a.val, hnum, num); \
+ vse_v_##len(ptr, a0, hnum);}\
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ vse_v_##len(ptr, a.val, num); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ vse_v_##len(ptr, a.val, num); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ vse_v_##len(ptr, a.val, num); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
+{ vse_v_##len(ptr, a.val, num); }
+
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint8x16, uchar, vuint8m1, u8m1, 8, 16)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int8x16, schar, vint8m1, i8m1, 8, 16)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint16x8, ushort, vuint16m1, u16m1, 4, 8)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int16x8, short, vint16m1, i16m1, 4, 8)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint32x4, unsigned, vuint32m1, u32m1, 2, 4)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int32x4, int, vint32m1, i32m1, 2, 4)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint64x2, unsigned long, vuint64m1, u64m1, 1, 2)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int64x2, long, vint64m1, i64m1, 1, 2)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_float32x4, float, vfloat32m1, f32m1, 2, 4)
+OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_float64x2, double, vfloat64m1, f64m1, 1, 2)
+
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+#if 1
+ schar CV_DECL_ALIGNED(32) elems[16] =
+ {
+ tab[idx[ 0]],
+ tab[idx[ 1]],
+ tab[idx[ 2]],
+ tab[idx[ 3]],
+ tab[idx[ 4]],
+ tab[idx[ 5]],
+ tab[idx[ 6]],
+ tab[idx[ 7]],
+ tab[idx[ 8]],
+ tab[idx[ 9]],
+ tab[idx[10]],
+ tab[idx[11]],
+ tab[idx[12]],
+ tab[idx[13]],
+ tab[idx[14]],
+ tab[idx[15]]
+ };
+ return v_int8x16(vle_v_i8m1(elems, 16));
+#else
+ int32xm4_t index32 = vlev_int32xm4(idx, 16);
+ vint16m2_t index16 = vnsra_vx_i16m2_int32xm4(index32, 0, 16);
+ vint8m1_t index = vnsra_vx_i8m1_i16m2(index16, 0, 16);
+ return v_int8x16(vlxbv_i8m1(tab, index, 16));
+#endif
+}
+
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx){
+ schar CV_DECL_ALIGNED(32) elems[16] =
+ {
+ tab[idx[0]],
+ tab[idx[0] + 1],
+ tab[idx[1]],
+ tab[idx[1] + 1],
+ tab[idx[2]],
+ tab[idx[2] + 1],
+ tab[idx[3]],
+ tab[idx[3] + 1],
+ tab[idx[4]],
+ tab[idx[4] + 1],
+ tab[idx[5]],
+ tab[idx[5] + 1],
+ tab[idx[6]],
+ tab[idx[6] + 1],
+ tab[idx[7]],
+ tab[idx[7] + 1]
+ };
+ return v_int8x16(vle_v_i8m1(elems, 16));
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+ schar CV_DECL_ALIGNED(32) elems[16] =
+ {
+ tab[idx[0]],
+ tab[idx[0] + 1],
+ tab[idx[0] + 2],
+ tab[idx[0] + 3],
+ tab[idx[1]],
+ tab[idx[1] + 1],
+ tab[idx[1] + 2],
+ tab[idx[1] + 3],
+ tab[idx[2]],
+ tab[idx[2] + 1],
+ tab[idx[2] + 2],
+ tab[idx[2] + 3],
+ tab[idx[3]],
+ tab[idx[3] + 1],
+ tab[idx[3] + 2],
+ tab[idx[3] + 3]
+ };
+ return v_int8x16(vle_v_i8m1(elems, 16));
+}
+
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+ short CV_DECL_ALIGNED(32) elems[8] =
+ {
+ tab[idx[0]],
+ tab[idx[1]],
+ tab[idx[2]],
+ tab[idx[3]],
+ tab[idx[4]],
+ tab[idx[5]],
+ tab[idx[6]],
+ tab[idx[7]]
+ };
+ return v_int16x8(vle_v_i16m1(elems, 8));
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+ short CV_DECL_ALIGNED(32) elems[8] =
+ {
+ tab[idx[0]],
+ tab[idx[0] + 1],
+ tab[idx[1]],
+ tab[idx[1] + 1],
+ tab[idx[2]],
+ tab[idx[2] + 1],
+ tab[idx[3]],
+ tab[idx[3] + 1]
+ };
+ return v_int16x8(vle_v_i16m1(elems, 8));
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+ short CV_DECL_ALIGNED(32) elems[8] =
+ {
+ tab[idx[0]],
+ tab[idx[0] + 1],
+ tab[idx[0] + 2],
+ tab[idx[0] + 3],
+ tab[idx[1]],
+ tab[idx[1] + 1],
+ tab[idx[1] + 2],
+ tab[idx[1] + 3]
+ };
+ return v_int16x8(vle_v_i16m1(elems, 8));
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+ int CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idx[0]],
+ tab[idx[1]],
+ tab[idx[2]],
+ tab[idx[3]]
+ };
+ return v_int32x4(vle_v_i32m1(elems, 4));
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+ int CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idx[0]],
+ tab[idx[0] + 1],
+ tab[idx[1]],
+ tab[idx[1] + 1]
+ };
+ return v_int32x4(vle_v_i32m1(elems, 4));
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+ return v_int32x4(vle_v_i32m1(tab+idx[0], 4));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+ vint64m1_t res = {tab[idx[0]], tab[idx[1]]};
+ return v_int64x2(res);
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+ return v_int64x2(vle_v_i64m1(tab+idx[0], 2));
+}
+
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx)
+{
+ vuint64m1_t res = {tab[idx[0]], tab[idx[1]]};
+ return v_uint64x2(res);
+}
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx)
+{
+ return v_uint64x2(vle_v_u64m1(tab+idx[0], 2));
+}
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+ float CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idx[0]],
+ tab[idx[1]],
+ tab[idx[2]],
+ tab[idx[3]]
+ };
+ return v_float32x4(vle_v_f32m1(elems, 4));
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx)
+{
+ float CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idx[0]],
+ tab[idx[0]+1],
+ tab[idx[1]],
+ tab[idx[1]+1]
+ };
+ return v_float32x4(vle_v_f32m1(elems, 4));
+}
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx)
+{
+ return v_float32x4(vle_v_f32m1(tab + idx[0], 4));
+}
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+ vfloat64m1_t res = {tab[idx[0]], tab[idx[1]]};
+ return v_float64x2(res);
+}
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx)
+{
+ return v_float64x2(vle_v_f64m1(tab+idx[0], 2));
+}
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+ int CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idxvec.val[0]],
+ tab[idxvec.val[1]],
+ tab[idxvec.val[2]],
+ tab[idxvec.val[3]]
+ };
+ return v_int32x4(vle_v_i32m1(elems, 4));
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+ unsigned CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idxvec.val[0]],
+ tab[idxvec.val[1]],
+ tab[idxvec.val[2]],
+ tab[idxvec.val[3]]
+ };
+ return v_uint32x4(vle_v_u32m1(elems, 4));
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+ float CV_DECL_ALIGNED(32) elems[4] =
+ {
+ tab[idxvec.val[0]],
+ tab[idxvec.val[1]],
+ tab[idxvec.val[2]],
+ tab[idxvec.val[3]]
+ };
+ return v_float32x4(vle_v_f32m1(elems, 4));
+}
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+ vfloat64m1_t res = {tab[idxvec.val[0]], tab[idxvec.val[1]]};
+ return v_float64x2(res);
+}
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+ vint32m1_t index_x = vmul_vx_i32m1(idxvec.val, 4, 4);
+ vint32m1_t index_y = vadd_vx_i32m1(index_x, 4, 4);
+
+ x.val = vlxe_v_f32m1(tab, index_x, 4);
+ y.val = vlxe_v_f32m1(tab, index_y, 4);
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+ int CV_DECL_ALIGNED(32) idx[4];
+ v_store_aligned(idx, idxvec);
+
+ x = v_float64x2(tab[idx[0]], tab[idx[1]]);
+ y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]);
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_PACKS(_Tp, _Tp2, _T2, num2, _T1, num, intrin, shr, _Type) \
+inline v_##_Tp##x##num v_pack(const v_##_Tp2##x##num2& a, const v_##_Tp2##x##num2& b) \
+{ \
+ v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \
+ tmp = vset_##_T2##m2(tmp, 0, a.val); \
+ tmp = vset_##_T2##m2(tmp, 1, b.val); \
+ return v_##_Tp##x##num(shr##_##_T1##m1(tmp, 0, num)); \
+}\
+template inline \
+v_##_Tp##x##num v_rshr_pack(const v_##_Tp2##x##num2& a, const v_##_Tp2##x##num2& b) \
+{ \
+ v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \
+ tmp = vset_##_T2##m2(tmp, 0, a.val); \
+ tmp = vset_##_T2##m2(tmp, 1, b.val); \
+ return v_##_Tp##x##num(intrin##_##_T1##m1(tmp, n, num)); \
+}\
+inline void v_pack_store(_Type* ptr, const v_##_Tp2##x##num2& a) \
+{ \
+ v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \
+ tmp = vset_##_T2##m2(tmp, 0, a.val); \
+ tmp = vset_##_T2##m2(tmp, 1, vmv_v_x_##_T2##m1(0, num2)); \
+ asm("" ::: "memory"); \
+ vse_v_##_T1##m1(ptr, shr##_##_T1##m1(tmp, 0, num), num2); \
+}\
+template inline \
+void v_rshr_pack_store(_Type* ptr, const v_##_Tp2##x##num2& a) \
+{ \
+ v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \
+ tmp = vset_##_T2##m2(tmp, 0, a.val); \
+ tmp = vset_##_T2##m2(tmp, 1, vmv_v_x_##_T2##m1(0, num2)); \
+ vse_v_##_T1##m1(ptr, intrin##_##_T1##m1(tmp, n, num), num2); \
+}
+OPENCV_HAL_IMPL_RISCVV_PACKS(int8, int16, i16, 8, i8, 16, vnclip_vx, vnclip_vx, signed char)
+OPENCV_HAL_IMPL_RISCVV_PACKS(int16, int32, i32, 4, i16, 8, vnclip_vx, vnclip_vx, signed short)
+OPENCV_HAL_IMPL_RISCVV_PACKS(int32, int64, i64, 2, i32, 4, vnclip_vx, vnsra_vx, int)
+OPENCV_HAL_IMPL_RISCVV_PACKS(uint8, uint16, u16, 8, u8, 16, vnclipu_vx, vnclipu_vx, unsigned char)
+OPENCV_HAL_IMPL_RISCVV_PACKS(uint16, uint32, u32, 4, u16, 8, vnclipu_vx, vnclipu_vx, unsigned short)
+OPENCV_HAL_IMPL_RISCVV_PACKS(uint32, uint64, u64, 2, u32, 4, vnclipu_vx, vnsrl_vx, unsigned int)
+
+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+ vuint16m2_t tmp = vundefined_u16m2(); \
+ tmp = vset_u16m2(tmp, 0, a.val); \
+ tmp = vset_u16m2(tmp, 1, b.val); \
+ return v_uint8x16(vnsrl_vx_u8m1(tmp, 0, 16));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+ const v_uint32x4& c, const v_uint32x4& d)
+{
+ vuint32m4_t vabcd = vundefined_u32m4(); \
+ vuint16m2_t v16 = vundefined_u16m2(); \
+ vabcd = vset_u32m4(vabcd, 0, a.val); \
+ vabcd = vset_u32m4(vabcd, 1, b.val); \
+ vabcd = vset_u32m4(vabcd, 2, c.val); \
+ vabcd = vset_u32m4(vabcd, 3, d.val); \
+ v16 = vnsrl_vx_u16m2(vabcd, 0, 16);
+ return v_uint8x16(vnsrl_vx_u8m1(v16, 0, 16));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+ const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+ const v_uint64x2& g, const v_uint64x2& h)
+{
+ vuint64m8_t v64 = vundefined_u64m8(); \
+ vuint32m4_t v32 = vundefined_u32m4(); \
+ vuint16m2_t v16 = vundefined_u16m2(); \
+ v64 = vset_u64m8(v64, 0, a.val); \
+ v64 = vset_u64m8(v64, 1, b.val); \
+ v64 = vset_u64m8(v64, 2, c.val); \
+ v64 = vset_u64m8(v64, 3, d.val); \
+ v64 = vset_u64m8(v64, 4, e.val); \
+ v64 = vset_u64m8(v64, 5, f.val); \
+ v64 = vset_u64m8(v64, 6, g.val); \
+ v64 = vset_u64m8(v64, 7, h.val); \
+ v32 = vnsrl_vx_u32m4(v64, 0, 16);
+ v16 = vnsrl_vx_u16m2(v32, 0, 16);
+ return v_uint8x16(vnsrl_vx_u8m1(v16, 0, 16));
+}
+
+//inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) \
+//{ \
+// int16xm2_u tmp; \
+// tmp.m1[0] = (vint16m1_t)a.val; \
+// tmp.m1[1] = (vint16m1_t)b.val; \
+// e8xm1_t mask = (e8xm1_t)vmsge_vx_e16xm2_i16m2(tmp.v, 0, 16);\
+// return v_uint8x16(vnclipuvi_mask_u8m1_u16m2(vmv_v_x_u8m1(0, 16), (vuint16m2_t)tmp.v, 0, mask, 16));
+//}
+
+#define OPENCV_HAL_IMPL_RISCVV_PACK_U(tp1, num1, tp2, num2, _Tp) \
+inline v_uint##tp1##x##num1 v_pack_u(const v_int##tp2##x##num2& a, const v_int##tp2##x##num2& b) \
+{ \
+ vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \
+ tmp = vset_##i##tp2##m2(tmp, 0, a.val); \
+ tmp = vset_##i##tp2##m2(tmp, 1, b.val); \
+ vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\
+ return v_uint##tp1##x##num1(vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, 0, num1)); \
+} \
+inline void v_pack_u_store(_Tp* ptr, const v_int##tp2##x##num2& a) \
+{ \
+ vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \
+ tmp = vset_##i##tp2##m2(tmp, 0, a.val); \
+ vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\
+ return vse_v_u##tp1##m1(ptr, vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, 0, num1), num2); \
+} \
+template inline \
+v_uint##tp1##x##num1 v_rshr_pack_u(const v_int##tp2##x##num2& a, const v_int##tp2##x##num2& b) \
+{ \
+ vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \
+ tmp = vset_##i##tp2##m2(tmp, 0, a.val); \
+ tmp = vset_##i##tp2##m2(tmp, 1, b.val); \
+ vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\
+ return v_uint##tp1##x##num1(vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, n, num1)); \
+} \
+template inline \
+void v_rshr_pack_u_store(_Tp* ptr, const v_int##tp2##x##num2& a) \
+{ \
+ vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \
+ tmp = vset_##i##tp2##m2(tmp, 0, a.val); \
+ vint##tp2##m2_t val_ = vmax_vx_i##tp2##m2(tmp, 0, num1);\
+ vuint##tp1##m1_t val = vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val_, n, num1); \
+ return vse_v_u##tp1##m1(ptr, val, num2);\
+}
+OPENCV_HAL_IMPL_RISCVV_PACK_U(8, 16, 16, 8, unsigned char )
+OPENCV_HAL_IMPL_RISCVV_PACK_U(16, 8, 32, 4, unsigned short)
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+
+// saturating multiply 8-bit, 16-bit
+#define OPENCV_HAL_IMPL_RISCVV_MUL_SAT(_Tpvec, _Tpwvec) \
+ inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
+ { \
+ _Tpwvec c, d; \
+ v_mul_expand(a, b, c, d); \
+ return v_pack(c, d); \
+ } \
+ inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
+ { a = a * b; return a; }
+
+OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_int8x16, v_int16x8)
+OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_int16x8, v_int32x4)
+OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_uint16x8, v_uint32x4)
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+static const signed char popCountTable[256] =
+{
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+
+inline vuint8m1_t vcnt_u8(vuint8m1_t val){
+ vuint8m1_t v0 = val & 1;
+ return vlxe_v_u8m1((unsigned char*)popCountTable, val >> 1, 16)+v0;
+}
+
+inline v_uint8x16
+v_popcount(const v_uint8x16& a)
+{
+ return v_uint8x16(vcnt_u8(a.val));
+}
+
+inline v_uint8x16
+v_popcount(const v_int8x16& a)
+{
+ return v_uint8x16(vcnt_u8((vuint8m1_t)a.val));
+}
+
+inline v_uint16x8
+v_popcount(const v_uint16x8& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0x0E0C0A0806040200, 0, 0x0F0D0B0907050301, 0};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint16m2_t res = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 8);
+ return v_uint16x8(vget_u16m2_u16m1(res, 0));
+}
+
+inline v_uint16x8
+v_popcount(const v_int16x8& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0x0E0C0A0806040200, 0, 0x0F0D0B0907050301, 0};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint16m2_t res = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 8);
+ return v_uint16x8(vget_u16m2_u16m1(res, 0));
+}
+
+inline v_uint32x4
+v_popcount(const v_uint32x4& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0xFFFFFFFF0C080400, 0xFFFFFFFF0D090501,
+ 0xFFFFFFFF0E0A0602, 0xFFFFFFFF0F0B0703};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint16m2_t res_ = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 16);
+ vuint32m2_t res = vwaddu_vv_u32m2(vget_u16m2_u16m1(res_, 0), vget_u16m2_u16m1(res_, 1), 8);
+ return v_uint32x4(vget_u32m2_u32m1(res, 0));
+}
+
+inline v_uint32x4
+v_popcount(const v_int32x4& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0xFFFFFFFF0C080400, 0xFFFFFFFF0D090501,
+ 0xFFFFFFFF0E0A0602, 0xFFFFFFFF0F0B0703};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint16m2_t res_ = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 16);
+ vuint32m2_t res = vwaddu_vv_u32m2(vget_u16m2_u16m1(res_, 0), vget_u16m2_u16m1(res_, 1), 8);
+ return v_uint32x4(vget_u32m2_u32m1(res, 0));
+}
+
+inline v_uint64x2
+v_popcount(const v_uint64x2& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0x0706050403020100, 0x0000000000000000,
+ 0x0F0E0D0C0B0A0908, 0x0000000000000000};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint8m1_t zero = vmv_v_x_u8m1(0, 16);
+ vuint8m1_t res1 = zero;
+ vuint8m1_t res2 = zero;
+ res1 = vredsum_vs_u8m1_u8m1(res1, vget_u8m2_u8m1(tmp, 0), zero, 8);
+ res2 = vredsum_vs_u8m1_u8m1(res2, vget_u8m2_u8m1(tmp, 1), zero, 8);
+
+ return v_uint64x2((unsigned long)vmv_x_s_u8m1_u8(res1, 8), (unsigned long)vmv_x_s_u8m1_u8(res2, 8));
+}
+
+inline v_uint64x2
+v_popcount(const v_int64x2& a)
+{
+ vuint8m2_t tmp = vundefined_u8m2();
+ tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val));
+ vuint64m2_t mask = (vuint64m2_t){0x0706050403020100, 0x0000000000000000,
+ 0x0F0E0D0C0B0A0908, 0x0000000000000000};
+ tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \
+ vuint8m1_t zero = vmv_v_x_u8m1(0, 16);
+ vuint8m1_t res1 = zero;
+ vuint8m1_t res2 = zero;
+ res1 = vredsum_vs_u8m1_u8m1(res1, vget_u8m2_u8m1(tmp, 0), zero, 8);
+ res2 = vredsum_vs_u8m1_u8m1(res2, vget_u8m2_u8m1(tmp, 1), zero, 8);
+
+ return v_uint64x2((unsigned long)vmv_x_s_u8m1_u8(res1, 8), (unsigned long)vmv_x_s_u8m1_u8(res2, 8));
+}
+
+#define SMASK 1, 2, 4, 8, 16, 32, 64, 128
+inline int v_signmask(const v_uint8x16& a)
+{
+ vuint8m1_t t0 = vsrl_vx_u8m1(a.val, 7, 16);
+ vuint8m1_t m1 = (vuint8m1_t){SMASK, SMASK};
+ vuint16m2_t t1 = vwmulu_vv_u16m2(t0, m1, 16);
+ vuint32m1_t res = vmv_v_x_u32m1(0, 4);
+ vuint32m2_t t2 = vwmulu_vx_u32m2(vget_u16m2_u16m1(t1, 1), 256, 8);
+ res = vredsum_vs_u32m2_u32m1(res, t2, res, 8);
+ res = vwredsumu_vs_u16m1_u32m1(res, vget_u16m2_u16m1(t1, 0), res, 8);
+ return vmv_x_s_u32m1_u32(res, 8);
+}
+inline int v_signmask(const v_int8x16& a)
+{
+ vuint8m1_t t0 = vsrl_vx_u8m1((vuint8m1_t)a.val, 7, 16);
+ vuint8m1_t m1 = (vuint8m1_t){SMASK, SMASK};
+ vint16m2_t t1 = (vint16m2_t)vwmulu_vv_u16m2(t0, m1, 16);
+ vint32m1_t res = vmv_v_x_i32m1(0, 4);
+ vint32m2_t t2 = vwmul_vx_i32m2(vget_i16m2_i16m1(t1, 1), 256, 8);
+ res = vredsum_vs_i32m2_i32m1(res, t2, res, 8);
+ res = vwredsum_vs_i16m1_i32m1(res, vget_i16m2_i16m1(t1, 0), res, 8);
+ return vmv_x_s_i32m1_i32(res, 8);
+}
+
+inline int v_signmask(const v_int16x8& a)
+{
+ vint16m1_t t0 = (vint16m1_t)vsrl_vx_u16m1((vuint16m1_t)a.val, 15, 8);
+ vint16m1_t m1 = (vint16m1_t){SMASK};
+ vint16m1_t t1 = vmul_vv_i16m1(t0, m1, 8);
+ vint16m1_t res = vmv_v_x_i16m1(0, 8);
+ res = vredsum_vs_i16m1_i16m1(res, t1, res, 8);
+ return vmv_x_s_i16m1_i16(res, 8);
+}
+inline int v_signmask(const v_uint16x8& a)
+{
+ vint16m1_t t0 = (vint16m1_t)vsrl_vx_u16m1((vuint16m1_t)a.val, 15, 8);
+ vint16m1_t m1 = (vint16m1_t){SMASK};
+ vint16m1_t t1 = vmul_vv_i16m1(t0, m1, 8);
+ vint16m1_t res = vmv_v_x_i16m1(0, 8);
+ res = vredsum_vs_i16m1_i16m1(res, t1, res, 8);
+ return vmv_x_s_i16m1_i16(res, 8);
+}
+inline int v_signmask(const v_int32x4& a)
+{
+ vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1((vuint32m1_t)a.val, 31, 4);
+ vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8};
+ vint32m1_t res = vmv_v_x_i32m1(0, 4);
+ vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4);
+ res = vredsum_vs_i32m1_i32m1(res, t1, res, 4);
+ return vmv_x_s_i32m1_i32(res, 4);
+}
+inline int v_signmask(const v_uint32x4& a)
+{
+ vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1(a.val, 31, 4);
+ vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8};
+ vint32m1_t res = vmv_v_x_i32m1(0, 4);
+ vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4);
+ res = vredsum_vs_i32m1_i32m1(res, t1, res, 4);
+ return vmv_x_s_i32m1_i32(res, 4);
+}
+inline int v_signmask(const v_uint64x2& a)
+{
+ vuint64m1_t v0 = vsrl_vx_u64m1(a.val, 63, 2);
+ int res = (int)vext_x_v_u64m1_u64(v0, 0, 2) + ((int)vext_x_v_u64m1_u64(v0, 1, 2) << 1);
+ return res;
+}
+inline int v_signmask(const v_int64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+inline int v_signmask(const v_float32x4& a)
+{
+ vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1((vuint32m1_t)a.val, 31, 4);
+ vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8};
+ vint32m1_t res = vmv_v_x_i32m1(0, 4);
+ vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4);
+ res = vredsum_vs_i32m1_i32m1(res, t1, res, 4);
+ return vmv_x_s_i32m1_i32(res, 4);
+}
+
+inline int v_scan_forward(const v_int8x16& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_uint8x16& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_int16x8& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_uint16x8& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_int32x4& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_uint32x4& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_float32x4& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_int64x2& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+inline int v_scan_forward(const v_uint64x2& a) {
+int val = v_signmask(a);
+if(val==0) return 0;
+else return trailingZeros32(val); }
+
+#define OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(_Tpvec, suffix, _T, shift, num) \
+inline bool v_check_all(const v_##_Tpvec& a) \
+{ \
+ suffix##m1_t v0 = vsrl_vx_##_T(vnot_v_##_T(a.val, num), shift, num); \
+ vuint64m1_t v1 = vuint64m1_t(v0); \
+ return (v1[0] | v1[1]) == 0; \
+} \
+inline bool v_check_any(const v_##_Tpvec& a) \
+{ \
+ suffix##m1_t v0 = vsrl_vx_##_T(a.val, shift, num); \
+ vuint64m1_t v1 = vuint64m1_t(v0); \
+ return (v1[0] | v1[1]) != 0; \
+}
+
+OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint8x16, vuint8, u8m1, 7, 16)
+OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint16x8, vuint16, u16m1, 15, 8)
+OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint32x4, vuint32, u32m1, 31, 4)
+OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint64x2, vuint64, u64m1, 63, 2)
+
+inline bool v_check_all(const v_int8x16& a)
+{ return v_check_all(v_reinterpret_as_u8(a)); }
+inline bool v_check_all(const v_int16x8& a)
+{ return v_check_all(v_reinterpret_as_u16(a)); }
+inline bool v_check_all(const v_int32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+inline bool v_check_all(const v_int64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+
+inline bool v_check_any(const v_int8x16& a)
+{ return v_check_any(v_reinterpret_as_u8(a)); }
+inline bool v_check_any(const v_int16x8& a)
+{ return v_check_any(v_reinterpret_as_u16(a)); }
+inline bool v_check_any(const v_int32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+inline bool v_check_any(const v_int64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+
+#define OPENCV_HAL_IMPL_RISCVV_SELECT(_Tpvec, suffix, _Tpvec2, num) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+ return _Tpvec(vmerge_vvm_##suffix(_Tpvec2(mask.val), b.val, a.val, num)); \
+}
+
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_int8x16, i8m1, vbool8_t, 16)
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_int16x8, i16m1, vbool16_t, 8)
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_int32x4, i32m1, vbool32_t, 4)
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint8x16, u8m1, vbool8_t, 16)
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint16x8, u16m1, vbool16_t, 8)
+OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint32x4, u32m1, vbool32_t, 4)
+inline v_float32x4 v_select(const v_float32x4& mask, const v_float32x4& a, const v_float32x4& b)
+{
+ return v_float32x4((vfloat32m1_t)vmerge_vvm_u32m1((vbool32_t)mask.val, (vuint32m1_t)b.val, (vuint32m1_t)a.val, 4));
+}
+inline v_float64x2 v_select(const v_float64x2& mask, const v_float64x2& a, const v_float64x2& b)
+{
+ return v_float64x2((vfloat64m1_t)vmerge_vvm_u64m1((vbool64_t)mask.val, (vuint64m1_t)b.val, (vuint64m1_t)a.val, 2));
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_EXPAND(add, _Tpvec, _Tpwvec, _Tp, _Tp1, num1, _Tp2, num2, _T1, _T2) \
+inline void v_expand(const _Tpvec& a, v_##_Tpwvec& b0, v_##_Tpwvec& b1) \
+{ \
+ _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num1), num1); \
+ b0.val = vget_##_Tp2##m2_##_Tp2##m1(b, 0); \
+ b1.val = vget_##_Tp2##m2_##_Tp2##m1(b, 1); \
+} \
+inline v_##_Tpwvec v_expand_low(const _Tpvec& a) \
+{ \
+ _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num2), num2); \
+ return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 0)); \
+} \
+inline v_##_Tpwvec v_expand_high(const _Tpvec& a) \
+{ \
+ _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num1), num1); \
+ return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 1)); \
+} \
+inline v_##_Tpwvec v_load_expand(const _Tp* ptr) \
+{ \
+ _T2##_t val = vle##_v_##_Tp1(ptr, num2); \
+ _T1##_t b = vw##add##_vv_##_Tp2##m2(val, vmv_v_x_##_Tp1(0, num2), num2); \
+ return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 0)); \
+}
+
+OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint8x16, uint16x8, uchar, u8m1, 16, u16, 8, vuint16m2, vuint8m1)
+OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint16x8, uint32x4, ushort, u16m1, 8, u32, 4, vuint32m2, vuint16m1)
+OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint32x4, uint64x2, uint, u32m1, 4, u64, 2, vuint64m2, vuint32m1)
+OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int8x16, int16x8, schar, i8m1, 16, i16, 8, vint16m2, vint8m1)
+OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int16x8, int32x4, short, i16m1, 8, i32, 4, vint32m2, vint16m1)
+OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int32x4, int64x2, int, i32m1, 4, i64, 2, vint64m2, vint32m1)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+ vuint16m2_t b = vundefined_u16m2();
+ vuint32m2_t c = vundefined_u32m2();
+ vuint8m1_t val = vle_v_u8m1(ptr, 4); \
+ b = vwaddu_vv_u16m2(val, vmv_v_x_u8m1(0, 4), 4); \
+ c = vwaddu_vv_u32m2(vget_u16m2_u16m1(b, 0), vmv_v_x_u16m1(0, 4), 4); \
+ return v_uint32x4(vget_u32m2_u32m1(c, 0));
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+ vint16m2_t b = vundefined_i16m2();
+ vint32m2_t c = vundefined_i32m2();
+ vint8m1_t val = vle_v_i8m1(ptr, 4); \
+ b = vwadd_vv_i16m2(val, vmv_v_x_i8m1(0, 4), 4); \
+ c = vwadd_vv_i32m2(vget_i16m2_i16m1(b, 0), vmv_v_x_i16m1(0, 4), 4); \
+ return v_int32x4(vget_i32m2_i32m1(c, 0));
+}
+#define VITL_16 (vuint64m2_t){0x1303120211011000, 0x1707160615051404, 0x1B0B1A0A19091808, 0x1F0F1E0E1D0D1C0C}
+#define VITL_8 (vuint64m2_t){0x0009000100080000, 0x000B0003000A0002, 0x000D0005000C0004, 0x000F0007000E0006}
+#define VITL_4 (vuint64m2_t){0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}
+#define VITL_2 (vuint64m2_t){0, 2, 1, 3}
+#define LOW_4 0x0000000100000000, 0x0000000500000004
+#define LOW_8 0x0003000200010000, 0x000B000A00090008
+#define LOW_16 0x0706050403020100, 0x1716151413121110
+#define HIGH_4 0x0000000300000002, 0x0000000700000006
+#define HIGH_8 0x0007000600050004, 0x000F000E000D000C
+#define HIGH_16 0x0F0E0D0C0B0A0908, 0x1F1E1D1C1B1A1918
+#define OPENCV_HAL_IMPL_RISCVV_UNPACKS(_Tpvec, _Tp, _T, _UTp, _UT, num, num2, len, numh) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+ v##_Tp##m2_t tmp = vundefined_##_T##m2();\
+ tmp = vset_##_T##m2(tmp, 0, a0.val); \
+ tmp = vset_##_T##m2(tmp, 1, a1.val); \
+ vuint64m2_t mask = VITL_##num; \
+ tmp = (v##_Tp##m2_t)vrgather_vv_##_T##m2((v##_Tp##m2_t)tmp, (v##_UTp##m2_t)mask, num2); \
+ b0.val = vget_##_T##m2_##_T##m1(tmp, 0); \
+ b1.val = vget_##_T##m2_##_T##m1(tmp, 1); \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+ v##_Tp##m1_t b0 = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a.val, b.val, numh, num); \
+ return v_##_Tpvec(b0);\
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+ v##_Tp##m1_t b0 = vslidedown_vx_##_T##m1(b.val, numh, num); \
+ v##_Tp##m1_t a0 = vslidedown_vx_##_T##m1(a.val, numh, num); \
+ v##_Tp##m1_t b1 = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a0, b0, numh, num); \
+ return v_##_Tpvec(b1);\
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+ c.val = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a.val, b.val, numh, num); \
+ v##_Tp##m1_t b0 = vslidedown_vx_##_T##m1(b.val, numh, num); \
+ v##_Tp##m1_t a0 = vslidedown_vx_##_T##m1(a.val, numh, num); \
+ d.val = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a0, b0, numh, num); \
+}
+
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint8x16, uint8, u8, uint8, u8, 16, 32, b8, 8)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(int8x16, int8, i8, uint8, u8, 16, 32, b8, 8)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint16x8, uint16, u16, uint16, u16, 8, 16, b16, 4)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(int16x8, int16, i16, uint16, u16, 8, 16, b16, 4)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint32x4, uint32, u32, uint32, u32, 4, 8, b32, 2)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(int32x4, int32, i32, uint32, u32, 4, 8, b32, 2)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(float32x4, float32, f32, uint32, u32, 4, 8, b32, 2)
+OPENCV_HAL_IMPL_RISCVV_UNPACKS(float64x2, float64, f64, uint64, u64, 2, 4, b64, 1)
+
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{
+ vuint64m1_t mask = (vuint64m1_t){0x08090A0B0C0D0E0F, 0x0001020304050607};
+ return v_uint8x16(vrgather_vv_u8m1(a.val, (vuint8m1_t)mask, 16));
+}
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{
+ vint64m1_t mask = (vint64m1_t){0x08090A0B0C0D0E0F, 0x0001020304050607};
+ return v_int8x16(vrgather_vv_i8m1(a.val, (vuint8m1_t)mask, 16));
+}
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{
+ vuint64m1_t mask = (vuint64m1_t){0x0004000500060007, 0x000000100020003};
+ return v_uint16x8(vrgather_vv_u16m1(a.val, (vuint16m1_t)mask, 8));
+}
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{
+ vint64m1_t mask = (vint64m1_t){0x0004000500060007, 0x000000100020003};
+ return v_int16x8(vrgather_vv_i16m1(a.val, (vuint16m1_t)mask, 8));
+}
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{
+ return v_uint32x4(vrgather_vv_u32m1(a.val, (vuint32m1_t){3, 2, 1, 0}, 4));
+}
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{
+ return v_int32x4(vrgather_vv_i32m1(a.val, (vuint32m1_t){3, 2, 1, 0}, 4));
+}
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{
+ return v_uint64x2(a.val[1], a.val[0]);
+}
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{
+ return v_int64x2(a.val[1], a.val[0]);
+}
+
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{
+ return v_float64x2(a.val[1], a.val[0]);
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_EXTRACT(_Tpvec, suffix, size) \
+template \
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
+{ return v_rotate_right(a, b);}
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint8x16, u8, 0)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int8x16, s8, 0)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint16x8, u16, 1)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int16x8, s16, 1)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint32x4, u32, 2)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int32x4, s32, 2)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint64x2, u64, 3)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int64x2, s64, 3)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_float32x4, f32, 2)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_float64x2, f64, 3)
+
+
+#define OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(_Tpvec, _Tp, suffix) \
+template inline _Tp v_extract_n(_Tpvec v) { return v.val[i]; }
+
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint32x4, uint, u32)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_float64x2, double, f64)
+
+#define OPENCV_HAL_IMPL_RISCVV_BROADCAST(_Tpvec, _Tp, num) \
+template inline _Tpvec v_broadcast_element(_Tpvec v) { return _Tpvec(vrgather_vx_##_Tp##m1(v.val, i, num)); }
+
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint8x16, u8, 16)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int8x16, i8, 16)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint16x8, u16, 8)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int16x8, i16, 8)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint32x4, u32, 4)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int32x4, i32, 4)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint64x2, u64, 2)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int64x2, i64, 2)
+OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_float32x4, f32, 4)
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+ __builtin_riscv_fsrm(0);
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+ __builtin_riscv_fsrm(2);
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+ __builtin_riscv_fsrm(3);
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{
+ __builtin_riscv_fsrm(1);
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+ __builtin_riscv_fsrm(0);
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ //_val = vset_f64m2(_val, 1, a.val);
+ _val = vset_f64m2(_val, 1, vfmv_v_f_f64m1(0, 2));
+ vint32m1_t val = vfncvt_x_f_v_i32m1(_val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+ __builtin_riscv_fsrm(0);
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ _val = vset_f64m2(_val, 1, b.val);
+ vint32m1_t val = vfncvt_x_f_v_i32m1(_val, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+ __builtin_riscv_fsrm(2);
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2);
+
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+ __builtin_riscv_fsrm(3);
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2);
+
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+ __builtin_riscv_fsrm(1);
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2);
+
+ vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4);
+ vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4);
+ vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4);
+ __builtin_riscv_fsrm(0);
+ return v_int32x4(val);
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_LOAD_DEINTERLEAVED(intrin, _Tpvec, num, _Tp, _T) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b) \
+{ \
+ v##_Tpvec##m1x2_t ret = intrin##2e_v_##_T##m1x2(ptr, num);\
+ a.val = vget_##_T##m1x2_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x2_##_T##m1(ret, 1); \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, v_##_Tpvec##x##num& c) \
+{ \
+ v##_Tpvec##m1x3_t ret = intrin##3e_v_##_T##m1x3(ptr, num);\
+ a.val = vget_##_T##m1x3_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x3_##_T##m1(ret, 1); \
+ c.val = vget_##_T##m1x3_##_T##m1(ret, 2); \
+}\
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, \
+ v_##_Tpvec##x##num& c, v_##_Tpvec##x##num& d) \
+{ \
+ v##_Tpvec##m1x4_t ret = intrin##4e_v_##_T##m1x4(ptr, num);\
+ a.val = vget_##_T##m1x4_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x4_##_T##m1(ret, 1); \
+ c.val = vget_##_T##m1x4_##_T##m1(ret, 2); \
+ d.val = vget_##_T##m1x4_##_T##m1(ret, 3); \
+} \
+
+#define OPENCV_HAL_IMPL_RISCVV_STORE_INTERLEAVED(intrin, _Tpvec, num, _Tp, _T) \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+ v##_Tpvec##m1x2_t ret = vundefined_##_T##m1x2(); \
+ ret = vset_##_T##m1x2(ret, 0, a.val); \
+ ret = vset_##_T##m1x2(ret, 1, b.val); \
+ intrin##2e_v_##_T##m1x2(ptr, ret, num); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ const v_##_Tpvec##x##num& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+ v##_Tpvec##m1x3_t ret = vundefined_##_T##m1x3(); \
+ ret = vset_##_T##m1x3(ret, 0, a.val); \
+ ret = vset_##_T##m1x3(ret, 1, b.val); \
+ ret = vset_##_T##m1x3(ret, 2, c.val); \
+ intrin##3e_v_##_T##m1x3(ptr, ret, num); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ const v_##_Tpvec##x##num& c, const v_##_Tpvec##x##num& d, \
+ hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
+{ \
+ v##_Tpvec##m1x4_t ret = vundefined_##_T##m1x4(); \
+ ret = vset_##_T##m1x4(ret, 0, a.val); \
+ ret = vset_##_T##m1x4(ret, 1, b.val); \
+ ret = vset_##_T##m1x4(ret, 2, c.val); \
+ ret = vset_##_T##m1x4(ret, 3, d.val); \
+ intrin##4e_v_##_T##m1x4(ptr, ret, num); \
+}
+
+#define OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(_Tpvec, _Tp, num, ld, st, _T) \
+OPENCV_HAL_IMPL_RISCVV_LOAD_DEINTERLEAVED(ld, _Tpvec, num, _Tp, _T) \
+OPENCV_HAL_IMPL_RISCVV_STORE_INTERLEAVED(st, _Tpvec, num, _Tp, _T)
+
+//OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint8, uchar, )
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int8, schar, 16, vlseg, vsseg, i8)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int16, short, 8, vlseg, vsseg, i16)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int32, int, 4, vlseg, vsseg, i32)
+
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint8, unsigned char, 16, vlseg, vsseg, u8)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint16, unsigned short, 8, vlseg, vsseg, u16)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint32, unsigned int, 4, vlseg, vsseg, u32)
+
+#define OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(_Tpvec, _Tp, num, _T) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b) \
+{ \
+ v##_Tpvec##m1x2_t ret = vlseg2e_v_##_T##m1x2(ptr, num); \
+ a.val = vget_##_T##m1x2_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x2_##_T##m1(ret, 1); \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, v_##_Tpvec##x##num& c) \
+{ \
+ v##_Tpvec##m1x3_t ret = vlseg3e_v_##_T##m1x3(ptr, num); \
+ a.val = vget_##_T##m1x3_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x3_##_T##m1(ret, 1); \
+ c.val = vget_##_T##m1x3_##_T##m1(ret, 2); \
+}\
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, \
+ v_##_Tpvec##x##num& c, v_##_Tpvec##x##num& d) \
+{ \
+ v##_Tpvec##m1x4_t ret = vlseg4e_v_##_T##m1x4(ptr, num); \
+ a.val = vget_##_T##m1x4_##_T##m1(ret, 0); \
+ b.val = vget_##_T##m1x4_##_T##m1(ret, 1); \
+ c.val = vget_##_T##m1x4_##_T##m1(ret, 2); \
+ d.val = vget_##_T##m1x4_##_T##m1(ret, 3); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+ v##_Tpvec##m1x2_t ret = vundefined_##_T##m1x2(); \
+ ret = vset_##_T##m1x2(ret, 0, a.val); \
+ ret = vset_##_T##m1x2(ret, 1, b.val); \
+ vsseg2e_v_##_T##m1x2(ptr, ret, num); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ const v_##_Tpvec##x##num& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+ v##_Tpvec##m1x3_t ret = vundefined_##_T##m1x3(); \
+ ret = vset_##_T##m1x3(ret, 0, a.val); \
+ ret = vset_##_T##m1x3(ret, 1, b.val); \
+ ret = vset_##_T##m1x3(ret, 2, c.val); \
+ vsseg3e_v_##_T##m1x3(ptr, ret, num); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \
+ const v_##_Tpvec##x##num& c, const v_##_Tpvec##x##num& d, \
+ hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
+{ \
+ v##_Tpvec##m1x4_t ret = vundefined_##_T##m1x4(); \
+ ret = vset_##_T##m1x4(ret, 0, a.val); \
+ ret = vset_##_T##m1x4(ret, 1, b.val); \
+ ret = vset_##_T##m1x4(ret, 2, c.val); \
+ ret = vset_##_T##m1x4(ret, 3, d.val); \
+ vsseg4e_v_##_T##m1x4(ptr, ret, num); \
+}
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(float32, float, 4, f32)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(float64, double, 2, f64)
+
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(uint64, unsigned long, 2, u64)
+OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(int64, long, 2, i64)
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+ return v_float32x4(vfcvt_f_x_v_f32m1(a.val, 4));
+}
+
+#if CV_SIMD128_64F
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2);
+ return v_float32x4(aval);
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+ vfloat64m2_t _val = vundefined_f64m2();
+ _val = vset_f64m2(_val, 0, a.val);
+ _val = vset_f64m2(_val, 1, b.val);
+ vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 4);
+ return v_float32x4(aval);
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+ vfloat32m1_t val = vfcvt_f_x_v_f32m1(a.val, 4);
+ vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(val, 4);
+ return v_float64x2(vget_f64m2_f64m1(_val, 0));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+ vfloat32m1_t val = vfcvt_f_x_v_f32m1(a.val, 4);
+ vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(val, 4);
+ return v_float64x2(vget_f64m2_f64m1(_val, 1));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+ vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(a.val, 4);
+ return v_float64x2(vget_f64m2_f64m1(_val, 0));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+ vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(a.val, 4);
+ return v_float64x2(vget_f64m2_f64m1(_val, 1));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{
+ return v_float64x2(vfcvt_f_x_v_f64m1(a.val, 2));
+}
+
+#endif
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+ vuint64m1_t m0 = {0x0705060403010200, 0x0F0D0E0C0B090A08};
+ return v_int8x16(vrgather_vv_i8m1(vec.val, (vuint8m1_t)m0, 16));
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec)
+{
+ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec)));
+}
+
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+ vuint64m1_t m0 = {0x0703060205010400, 0x0F0B0E0A0D090C08};
+ return v_int8x16(vrgather_vv_i8m1(vec.val, (vuint8m1_t)m0, 16));
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec)
+{
+ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec)));
+}
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+ vuint64m1_t m0 = {0x0706030205040100, 0x0F0E0B0A0D0C0908};
+ return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vec.val, (vuint8m1_t)m0, 16));
+}
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+ vuint64m1_t m0 = {0x0B0A030209080100, 0x0F0E07060D0C0504};
+ return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16));
+}
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+ vuint64m1_t m0 = {0x0B0A090803020100, 0x0F0E0D0C07060504};
+ return v_int32x4((vint32m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16));
+}
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+ vuint64m1_t m0 = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
+ return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16));
+}
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+ vuint64m1_t m0 = {0x0908050403020100, 0xFFFFFFFF0D0C0B0A};
+ return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16));
+}
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b,
+ const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{
+ vint64m2_t v1 = vwmul_vv_i64m2(a.val, b.val, 4);
+ vfloat64m1_t res = vfcvt_f_x_v_f64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 2), 2);
+ return v_float64x2(res);
+}
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ v_float64x2 res = v_dotprod_expand_fast(a, b);
+ return res + c; }
+#endif
+////// FP16 support ///////
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+ vfloat16m1_t v = vle_v_f16m1((__fp16*)ptr, 4);
+ vfloat32m2_t v32 = vfwcvt_f_f_v_f32m2(v, 4);
+ return v_float32x4(vget_f32m2_f32m1(v32, 0));
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+ vfloat32m2_t v32 = vundefined_f32m2();
+ v32 = vset_f32m2(v32, 0, v.val);
+ vfloat16m1_t hv = vfncvt_f_f_v_f16m1(v32, 4);
+ vse_v_f16m1((__fp16*)ptr, hv, 4);
+}
+
+
+inline void v_cleanup() {}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+#endif
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index eeb83c074475..6768be76834b 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -2451,7 +2451,8 @@ class CV_EXPORTS UMat
//! <0 - a diagonal from the lower half)
UMat diag(int d=0) const;
//! constructs a square diagonal matrix which main diagonal is vector "d"
- static UMat diag(const UMat& d);
+ static UMat diag(const UMat& d, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat diag(const UMat& d) { return diag(d, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
//! returns deep copy of the matrix, i.e. the data is copied
UMat clone() const CV_NODISCARD;
@@ -2485,14 +2486,22 @@ class CV_EXPORTS UMat
double dot(InputArray m) const;
//! Matlab-style matrix initialization
- static UMat zeros(int rows, int cols, int type);
- static UMat zeros(Size size, int type);
- static UMat zeros(int ndims, const int* sz, int type);
- static UMat ones(int rows, int cols, int type);
- static UMat ones(Size size, int type);
- static UMat ones(int ndims, const int* sz, int type);
- static UMat eye(int rows, int cols, int type);
- static UMat eye(Size size, int type);
+ static UMat zeros(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat zeros(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat zeros(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat zeros(int rows, int cols, int type) { return zeros(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat zeros(Size size, int type) { return zeros(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat zeros(int ndims, const int* sz, int type) { return zeros(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat ones(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat ones(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat ones(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat ones(int rows, int cols, int type) { return ones(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat ones(Size size, int type) { return ones(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat ones(int ndims, const int* sz, int type) { return ones(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat eye(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat eye(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/);
+ static UMat eye(int rows, int cols, int type) { return eye(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
+ static UMat eye(Size size, int type) { return eye(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload
//! allocates new matrix data unless the matrix already has specified size and type.
// previous data is unreferenced if needed.
@@ -2572,27 +2581,38 @@ class CV_EXPORTS UMat
- number of channels
*/
int flags;
+
//! the matrix dimensionality, >= 2
int dims;
- //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
- int rows, cols;
+
+ //! number of rows in the matrix; -1 when the matrix has more than 2 dimensions
+ int rows;
+
+ //! number of columns in the matrix; -1 when the matrix has more than 2 dimensions
+ int cols;
//! custom allocator
MatAllocator* allocator;
- UMatUsageFlags usageFlags; // usage flags for allocator
+
+ //! usage flags for allocator; recommend do not set directly, instead set during construct/create/getUMat
+ UMatUsageFlags usageFlags;
+
//! and the standard allocator
static MatAllocator* getStdAllocator();
//! internal use method: updates the continuity flag
void updateContinuityFlag();
- // black-box container of UMat data
+ //! black-box container of UMat data
UMatData* u;
- // offset of the submatrix (or 0)
+ //! offset of the submatrix (or 0)
size_t offset;
+ //! dimensional size of the matrix; accessible in various formats
MatSize size;
+
+ //! number of bytes each matrix element/row/plane/dimension occupies
MatStep step;
protected:
diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
index 5e5c846ad059..3ead76e5c46e 100644
--- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp
+++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
@@ -144,6 +144,10 @@ static void dumpOpenCLInformation()
DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr);
DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);
+ const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No";
+ DUMP_MESSAGE_STDOUT(" Half support = " << halfSupportStr);
+ DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0);
+
const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr);
DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
@@ -191,6 +195,9 @@ static void dumpOpenCLInformation()
DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble());
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
+
+ DUMP_MESSAGE_STDOUT(" Preferred vector width half = " << device.preferredVectorWidthHalf());
+ DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf());
}
catch (...)
{
diff --git a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
deleted file mode 100644
index 65c84935240d..000000000000
--- a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
+++ /dev/null
@@ -1,714 +0,0 @@
-//
-// AUTOGENERATED, DO NOT EDIT
-//
-#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
-#error "Invalid usage"
-#endif
-
-// generated by parser_clamdblas.py
-#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_
-#define clAmdBlasCaxpy clAmdBlasCaxpy_
-#define clAmdBlasCcopy clAmdBlasCcopy_
-#define clAmdBlasCdotc clAmdBlasCdotc_
-#define clAmdBlasCdotu clAmdBlasCdotu_
-#define clAmdBlasCgbmv clAmdBlasCgbmv_
-#define clAmdBlasCgemm clAmdBlasCgemm_
-#define clAmdBlasCgemmEx clAmdBlasCgemmEx_
-#define clAmdBlasCgemv clAmdBlasCgemv_
-#define clAmdBlasCgemvEx clAmdBlasCgemvEx_
-#define clAmdBlasCgerc clAmdBlasCgerc_
-#define clAmdBlasCgeru clAmdBlasCgeru_
-#define clAmdBlasChbmv clAmdBlasChbmv_
-#define clAmdBlasChemm clAmdBlasChemm_
-#define clAmdBlasChemv clAmdBlasChemv_
-#define clAmdBlasCher clAmdBlasCher_
-#define clAmdBlasCher2 clAmdBlasCher2_
-#define clAmdBlasCher2k clAmdBlasCher2k_
-#define clAmdBlasCherk clAmdBlasCherk_
-#define clAmdBlasChpmv clAmdBlasChpmv_
-#define clAmdBlasChpr clAmdBlasChpr_
-#define clAmdBlasChpr2 clAmdBlasChpr2_
-#define clAmdBlasCrotg clAmdBlasCrotg_
-#define clAmdBlasCscal clAmdBlasCscal_
-#define clAmdBlasCsrot clAmdBlasCsrot_
-#define clAmdBlasCsscal clAmdBlasCsscal_
-#define clAmdBlasCswap clAmdBlasCswap_
-#define clAmdBlasCsymm clAmdBlasCsymm_
-#define clAmdBlasCsyr2k clAmdBlasCsyr2k_
-#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_
-#define clAmdBlasCsyrk clAmdBlasCsyrk_
-#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_
-#define clAmdBlasCtbmv clAmdBlasCtbmv_
-#define clAmdBlasCtbsv clAmdBlasCtbsv_
-#define clAmdBlasCtpmv clAmdBlasCtpmv_
-#define clAmdBlasCtpsv clAmdBlasCtpsv_
-#define clAmdBlasCtrmm clAmdBlasCtrmm_
-#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_
-#define clAmdBlasCtrmv clAmdBlasCtrmv_
-#define clAmdBlasCtrsm clAmdBlasCtrsm_
-#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_
-#define clAmdBlasCtrsv clAmdBlasCtrsv_
-#define clAmdBlasDasum clAmdBlasDasum_
-#define clAmdBlasDaxpy clAmdBlasDaxpy_
-#define clAmdBlasDcopy clAmdBlasDcopy_
-#define clAmdBlasDdot clAmdBlasDdot_
-#define clAmdBlasDgbmv clAmdBlasDgbmv_
-#define clAmdBlasDgemm clAmdBlasDgemm_
-#define clAmdBlasDgemmEx clAmdBlasDgemmEx_
-#define clAmdBlasDgemv clAmdBlasDgemv_
-#define clAmdBlasDgemvEx clAmdBlasDgemvEx_
-#define clAmdBlasDger clAmdBlasDger_
-#define clAmdBlasDnrm2 clAmdBlasDnrm2_
-#define clAmdBlasDrot clAmdBlasDrot_
-#define clAmdBlasDrotg clAmdBlasDrotg_
-#define clAmdBlasDrotm clAmdBlasDrotm_
-#define clAmdBlasDrotmg clAmdBlasDrotmg_
-#define clAmdBlasDsbmv clAmdBlasDsbmv_
-#define clAmdBlasDscal clAmdBlasDscal_
-#define clAmdBlasDspmv clAmdBlasDspmv_
-#define clAmdBlasDspr clAmdBlasDspr_
-#define clAmdBlasDspr2 clAmdBlasDspr2_
-#define clAmdBlasDswap clAmdBlasDswap_
-#define clAmdBlasDsymm clAmdBlasDsymm_
-#define clAmdBlasDsymv clAmdBlasDsymv_
-#define clAmdBlasDsymvEx clAmdBlasDsymvEx_
-#define clAmdBlasDsyr clAmdBlasDsyr_
-#define clAmdBlasDsyr2 clAmdBlasDsyr2_
-#define clAmdBlasDsyr2k clAmdBlasDsyr2k_
-#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_
-#define clAmdBlasDsyrk clAmdBlasDsyrk_
-#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_
-#define clAmdBlasDtbmv clAmdBlasDtbmv_
-#define clAmdBlasDtbsv clAmdBlasDtbsv_
-#define clAmdBlasDtpmv clAmdBlasDtpmv_
-#define clAmdBlasDtpsv clAmdBlasDtpsv_
-#define clAmdBlasDtrmm clAmdBlasDtrmm_
-#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_
-#define clAmdBlasDtrmv clAmdBlasDtrmv_
-#define clAmdBlasDtrsm clAmdBlasDtrsm_
-#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_
-#define clAmdBlasDtrsv clAmdBlasDtrsv_
-#define clAmdBlasDzasum clAmdBlasDzasum_
-#define clAmdBlasDznrm2 clAmdBlasDznrm2_
-#define clAmdBlasGetVersion clAmdBlasGetVersion_
-#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_
-#define clAmdBlasSasum clAmdBlasSasum_
-#define clAmdBlasSaxpy clAmdBlasSaxpy_
-#define clAmdBlasScasum clAmdBlasScasum_
-#define clAmdBlasScnrm2 clAmdBlasScnrm2_
-#define clAmdBlasScopy clAmdBlasScopy_
-#define clAmdBlasSdot clAmdBlasSdot_
-#define clAmdBlasSetup clAmdBlasSetup_
-#define clAmdBlasSgbmv clAmdBlasSgbmv_
-#define clAmdBlasSgemm clAmdBlasSgemm_
-#define clAmdBlasSgemmEx clAmdBlasSgemmEx_
-#define clAmdBlasSgemv clAmdBlasSgemv_
-#define clAmdBlasSgemvEx clAmdBlasSgemvEx_
-#define clAmdBlasSger clAmdBlasSger_
-#define clAmdBlasSnrm2 clAmdBlasSnrm2_
-#define clAmdBlasSrot clAmdBlasSrot_
-#define clAmdBlasSrotg clAmdBlasSrotg_
-#define clAmdBlasSrotm clAmdBlasSrotm_
-#define clAmdBlasSrotmg clAmdBlasSrotmg_
-#define clAmdBlasSsbmv clAmdBlasSsbmv_
-#define clAmdBlasSscal clAmdBlasSscal_
-#define clAmdBlasSspmv clAmdBlasSspmv_
-#define clAmdBlasSspr clAmdBlasSspr_
-#define clAmdBlasSspr2 clAmdBlasSspr2_
-#define clAmdBlasSswap clAmdBlasSswap_
-#define clAmdBlasSsymm clAmdBlasSsymm_
-#define clAmdBlasSsymv clAmdBlasSsymv_
-#define clAmdBlasSsymvEx clAmdBlasSsymvEx_
-#define clAmdBlasSsyr clAmdBlasSsyr_
-#define clAmdBlasSsyr2 clAmdBlasSsyr2_
-#define clAmdBlasSsyr2k clAmdBlasSsyr2k_
-#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_
-#define clAmdBlasSsyrk clAmdBlasSsyrk_
-#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_
-#define clAmdBlasStbmv clAmdBlasStbmv_
-#define clAmdBlasStbsv clAmdBlasStbsv_
-#define clAmdBlasStpmv clAmdBlasStpmv_
-#define clAmdBlasStpsv clAmdBlasStpsv_
-#define clAmdBlasStrmm clAmdBlasStrmm_
-#define clAmdBlasStrmmEx clAmdBlasStrmmEx_
-#define clAmdBlasStrmv clAmdBlasStrmv_
-#define clAmdBlasStrsm clAmdBlasStrsm_
-#define clAmdBlasStrsmEx clAmdBlasStrsmEx_
-#define clAmdBlasStrsv clAmdBlasStrsv_
-#define clAmdBlasTeardown clAmdBlasTeardown_
-#define clAmdBlasZaxpy clAmdBlasZaxpy_
-#define clAmdBlasZcopy clAmdBlasZcopy_
-#define clAmdBlasZdotc clAmdBlasZdotc_
-#define clAmdBlasZdotu clAmdBlasZdotu_
-#define clAmdBlasZdrot clAmdBlasZdrot_
-#define clAmdBlasZdscal clAmdBlasZdscal_
-#define clAmdBlasZgbmv clAmdBlasZgbmv_
-#define clAmdBlasZgemm clAmdBlasZgemm_
-#define clAmdBlasZgemmEx clAmdBlasZgemmEx_
-#define clAmdBlasZgemv clAmdBlasZgemv_
-#define clAmdBlasZgemvEx clAmdBlasZgemvEx_
-#define clAmdBlasZgerc clAmdBlasZgerc_
-#define clAmdBlasZgeru clAmdBlasZgeru_
-#define clAmdBlasZhbmv clAmdBlasZhbmv_
-#define clAmdBlasZhemm clAmdBlasZhemm_
-#define clAmdBlasZhemv clAmdBlasZhemv_
-#define clAmdBlasZher clAmdBlasZher_
-#define clAmdBlasZher2 clAmdBlasZher2_
-#define clAmdBlasZher2k clAmdBlasZher2k_
-#define clAmdBlasZherk clAmdBlasZherk_
-#define clAmdBlasZhpmv clAmdBlasZhpmv_
-#define clAmdBlasZhpr clAmdBlasZhpr_
-#define clAmdBlasZhpr2 clAmdBlasZhpr2_
-#define clAmdBlasZrotg clAmdBlasZrotg_
-#define clAmdBlasZscal clAmdBlasZscal_
-#define clAmdBlasZswap clAmdBlasZswap_
-#define clAmdBlasZsymm clAmdBlasZsymm_
-#define clAmdBlasZsyr2k clAmdBlasZsyr2k_
-#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_
-#define clAmdBlasZsyrk clAmdBlasZsyrk_
-#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_
-#define clAmdBlasZtbmv clAmdBlasZtbmv_
-#define clAmdBlasZtbsv clAmdBlasZtbsv_
-#define clAmdBlasZtpmv clAmdBlasZtpmv_
-#define clAmdBlasZtpsv clAmdBlasZtpsv_
-#define clAmdBlasZtrmm clAmdBlasZtrmm_
-#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_
-#define clAmdBlasZtrmv clAmdBlasZtrmv_
-#define clAmdBlasZtrsm clAmdBlasZtrsm_
-#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_
-#define clAmdBlasZtrsv clAmdBlasZtrsv_
-#define clAmdBlasiCamax clAmdBlasiCamax_
-#define clAmdBlasiDamax clAmdBlasiDamax_
-#define clAmdBlasiSamax clAmdBlasiSamax_
-#define clAmdBlasiZamax clAmdBlasiZamax_
-
-#include
-
-// generated by parser_clamdblas.py
-#undef clAmdBlasAddScratchImage
-//#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn
-#undef clAmdBlasCaxpy
-//#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn
-#undef clAmdBlasCcopy
-//#define clAmdBlasCcopy clAmdBlasCcopy_pfn
-#undef clAmdBlasCdotc
-//#define clAmdBlasCdotc clAmdBlasCdotc_pfn
-#undef clAmdBlasCdotu
-//#define clAmdBlasCdotu clAmdBlasCdotu_pfn
-#undef clAmdBlasCgbmv
-//#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn
-#undef clAmdBlasCgemm
-//#define clAmdBlasCgemm clAmdBlasCgemm_pfn
-#undef clAmdBlasCgemmEx
-#define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn
-#undef clAmdBlasCgemv
-//#define clAmdBlasCgemv clAmdBlasCgemv_pfn
-#undef clAmdBlasCgemvEx
-//#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn
-#undef clAmdBlasCgerc
-//#define clAmdBlasCgerc clAmdBlasCgerc_pfn
-#undef clAmdBlasCgeru
-//#define clAmdBlasCgeru clAmdBlasCgeru_pfn
-#undef clAmdBlasChbmv
-//#define clAmdBlasChbmv clAmdBlasChbmv_pfn
-#undef clAmdBlasChemm
-//#define clAmdBlasChemm clAmdBlasChemm_pfn
-#undef clAmdBlasChemv
-//#define clAmdBlasChemv clAmdBlasChemv_pfn
-#undef clAmdBlasCher
-//#define clAmdBlasCher clAmdBlasCher_pfn
-#undef clAmdBlasCher2
-//#define clAmdBlasCher2 clAmdBlasCher2_pfn
-#undef clAmdBlasCher2k
-//#define clAmdBlasCher2k clAmdBlasCher2k_pfn
-#undef clAmdBlasCherk
-//#define clAmdBlasCherk clAmdBlasCherk_pfn
-#undef clAmdBlasChpmv
-//#define clAmdBlasChpmv clAmdBlasChpmv_pfn
-#undef clAmdBlasChpr
-//#define clAmdBlasChpr clAmdBlasChpr_pfn
-#undef clAmdBlasChpr2
-//#define clAmdBlasChpr2 clAmdBlasChpr2_pfn
-#undef clAmdBlasCrotg
-//#define clAmdBlasCrotg clAmdBlasCrotg_pfn
-#undef clAmdBlasCscal
-//#define clAmdBlasCscal clAmdBlasCscal_pfn
-#undef clAmdBlasCsrot
-//#define clAmdBlasCsrot clAmdBlasCsrot_pfn
-#undef clAmdBlasCsscal
-//#define clAmdBlasCsscal clAmdBlasCsscal_pfn
-#undef clAmdBlasCswap
-//#define clAmdBlasCswap clAmdBlasCswap_pfn
-#undef clAmdBlasCsymm
-//#define clAmdBlasCsymm clAmdBlasCsymm_pfn
-#undef clAmdBlasCsyr2k
-//#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn
-#undef clAmdBlasCsyr2kEx
-//#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn
-#undef clAmdBlasCsyrk
-//#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn
-#undef clAmdBlasCsyrkEx
-//#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn
-#undef clAmdBlasCtbmv
-//#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn
-#undef clAmdBlasCtbsv
-//#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn
-#undef clAmdBlasCtpmv
-//#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn
-#undef clAmdBlasCtpsv
-//#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn
-#undef clAmdBlasCtrmm
-//#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn
-#undef clAmdBlasCtrmmEx
-//#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn
-#undef clAmdBlasCtrmv
-//#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn
-#undef clAmdBlasCtrsm
-//#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn
-#undef clAmdBlasCtrsmEx
-//#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn
-#undef clAmdBlasCtrsv
-//#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn
-#undef clAmdBlasDasum
-//#define clAmdBlasDasum clAmdBlasDasum_pfn
-#undef clAmdBlasDaxpy
-//#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn
-#undef clAmdBlasDcopy
-//#define clAmdBlasDcopy clAmdBlasDcopy_pfn
-#undef clAmdBlasDdot
-//#define clAmdBlasDdot clAmdBlasDdot_pfn
-#undef clAmdBlasDgbmv
-//#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn
-#undef clAmdBlasDgemm
-//#define clAmdBlasDgemm clAmdBlasDgemm_pfn
-#undef clAmdBlasDgemmEx
-#define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn
-#undef clAmdBlasDgemv
-//#define clAmdBlasDgemv clAmdBlasDgemv_pfn
-#undef clAmdBlasDgemvEx
-//#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn
-#undef clAmdBlasDger
-//#define clAmdBlasDger clAmdBlasDger_pfn
-#undef clAmdBlasDnrm2
-//#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn
-#undef clAmdBlasDrot
-//#define clAmdBlasDrot clAmdBlasDrot_pfn
-#undef clAmdBlasDrotg
-//#define clAmdBlasDrotg clAmdBlasDrotg_pfn
-#undef clAmdBlasDrotm
-//#define clAmdBlasDrotm clAmdBlasDrotm_pfn
-#undef clAmdBlasDrotmg
-//#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn
-#undef clAmdBlasDsbmv
-//#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn
-#undef clAmdBlasDscal
-//#define clAmdBlasDscal clAmdBlasDscal_pfn
-#undef clAmdBlasDspmv
-//#define clAmdBlasDspmv clAmdBlasDspmv_pfn
-#undef clAmdBlasDspr
-//#define clAmdBlasDspr clAmdBlasDspr_pfn
-#undef clAmdBlasDspr2
-//#define clAmdBlasDspr2 clAmdBlasDspr2_pfn
-#undef clAmdBlasDswap
-//#define clAmdBlasDswap clAmdBlasDswap_pfn
-#undef clAmdBlasDsymm
-//#define clAmdBlasDsymm clAmdBlasDsymm_pfn
-#undef clAmdBlasDsymv
-//#define clAmdBlasDsymv clAmdBlasDsymv_pfn
-#undef clAmdBlasDsymvEx
-//#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn
-#undef clAmdBlasDsyr
-//#define clAmdBlasDsyr clAmdBlasDsyr_pfn
-#undef clAmdBlasDsyr2
-//#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn
-#undef clAmdBlasDsyr2k
-//#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn
-#undef clAmdBlasDsyr2kEx
-//#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn
-#undef clAmdBlasDsyrk
-//#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn
-#undef clAmdBlasDsyrkEx
-//#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn
-#undef clAmdBlasDtbmv
-//#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn
-#undef clAmdBlasDtbsv
-//#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn
-#undef clAmdBlasDtpmv
-//#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn
-#undef clAmdBlasDtpsv
-//#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn
-#undef clAmdBlasDtrmm
-//#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn
-#undef clAmdBlasDtrmmEx
-//#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn
-#undef clAmdBlasDtrmv
-//#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn
-#undef clAmdBlasDtrsm
-//#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn
-#undef clAmdBlasDtrsmEx
-//#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn
-#undef clAmdBlasDtrsv
-//#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn
-#undef clAmdBlasDzasum
-//#define clAmdBlasDzasum clAmdBlasDzasum_pfn
-#undef clAmdBlasDznrm2
-//#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn
-#undef clAmdBlasGetVersion
-//#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn
-#undef clAmdBlasRemoveScratchImage
-//#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn
-#undef clAmdBlasSasum
-//#define clAmdBlasSasum clAmdBlasSasum_pfn
-#undef clAmdBlasSaxpy
-//#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn
-#undef clAmdBlasScasum
-//#define clAmdBlasScasum clAmdBlasScasum_pfn
-#undef clAmdBlasScnrm2
-//#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn
-#undef clAmdBlasScopy
-//#define clAmdBlasScopy clAmdBlasScopy_pfn
-#undef clAmdBlasSdot
-//#define clAmdBlasSdot clAmdBlasSdot_pfn
-#undef clAmdBlasSetup
-#define clAmdBlasSetup clAmdBlasSetup_pfn
-#undef clAmdBlasSgbmv
-//#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn
-#undef clAmdBlasSgemm
-//#define clAmdBlasSgemm clAmdBlasSgemm_pfn
-#undef clAmdBlasSgemmEx
-#define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn
-#undef clAmdBlasSgemv
-//#define clAmdBlasSgemv clAmdBlasSgemv_pfn
-#undef clAmdBlasSgemvEx
-//#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn
-#undef clAmdBlasSger
-//#define clAmdBlasSger clAmdBlasSger_pfn
-#undef clAmdBlasSnrm2
-//#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn
-#undef clAmdBlasSrot
-//#define clAmdBlasSrot clAmdBlasSrot_pfn
-#undef clAmdBlasSrotg
-//#define clAmdBlasSrotg clAmdBlasSrotg_pfn
-#undef clAmdBlasSrotm
-//#define clAmdBlasSrotm clAmdBlasSrotm_pfn
-#undef clAmdBlasSrotmg
-//#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn
-#undef clAmdBlasSsbmv
-//#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn
-#undef clAmdBlasSscal
-//#define clAmdBlasSscal clAmdBlasSscal_pfn
-#undef clAmdBlasSspmv
-//#define clAmdBlasSspmv clAmdBlasSspmv_pfn
-#undef clAmdBlasSspr
-//#define clAmdBlasSspr clAmdBlasSspr_pfn
-#undef clAmdBlasSspr2
-//#define clAmdBlasSspr2 clAmdBlasSspr2_pfn
-#undef clAmdBlasSswap
-//#define clAmdBlasSswap clAmdBlasSswap_pfn
-#undef clAmdBlasSsymm
-//#define clAmdBlasSsymm clAmdBlasSsymm_pfn
-#undef clAmdBlasSsymv
-//#define clAmdBlasSsymv clAmdBlasSsymv_pfn
-#undef clAmdBlasSsymvEx
-//#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn
-#undef clAmdBlasSsyr
-//#define clAmdBlasSsyr clAmdBlasSsyr_pfn
-#undef clAmdBlasSsyr2
-//#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn
-#undef clAmdBlasSsyr2k
-//#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn
-#undef clAmdBlasSsyr2kEx
-//#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn
-#undef clAmdBlasSsyrk
-//#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn
-#undef clAmdBlasSsyrkEx
-//#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn
-#undef clAmdBlasStbmv
-//#define clAmdBlasStbmv clAmdBlasStbmv_pfn
-#undef clAmdBlasStbsv
-//#define clAmdBlasStbsv clAmdBlasStbsv_pfn
-#undef clAmdBlasStpmv
-//#define clAmdBlasStpmv clAmdBlasStpmv_pfn
-#undef clAmdBlasStpsv
-//#define clAmdBlasStpsv clAmdBlasStpsv_pfn
-#undef clAmdBlasStrmm
-//#define clAmdBlasStrmm clAmdBlasStrmm_pfn
-#undef clAmdBlasStrmmEx
-//#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn
-#undef clAmdBlasStrmv
-//#define clAmdBlasStrmv clAmdBlasStrmv_pfn
-#undef clAmdBlasStrsm
-//#define clAmdBlasStrsm clAmdBlasStrsm_pfn
-#undef clAmdBlasStrsmEx
-//#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn
-#undef clAmdBlasStrsv
-//#define clAmdBlasStrsv clAmdBlasStrsv_pfn
-#undef clAmdBlasTeardown
-#define clAmdBlasTeardown clAmdBlasTeardown_pfn
-#undef clAmdBlasZaxpy
-//#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn
-#undef clAmdBlasZcopy
-//#define clAmdBlasZcopy clAmdBlasZcopy_pfn
-#undef clAmdBlasZdotc
-//#define clAmdBlasZdotc clAmdBlasZdotc_pfn
-#undef clAmdBlasZdotu
-//#define clAmdBlasZdotu clAmdBlasZdotu_pfn
-#undef clAmdBlasZdrot
-//#define clAmdBlasZdrot clAmdBlasZdrot_pfn
-#undef clAmdBlasZdscal
-//#define clAmdBlasZdscal clAmdBlasZdscal_pfn
-#undef clAmdBlasZgbmv
-//#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn
-#undef clAmdBlasZgemm
-//#define clAmdBlasZgemm clAmdBlasZgemm_pfn
-#undef clAmdBlasZgemmEx
-#define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn
-#undef clAmdBlasZgemv
-//#define clAmdBlasZgemv clAmdBlasZgemv_pfn
-#undef clAmdBlasZgemvEx
-//#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn
-#undef clAmdBlasZgerc
-//#define clAmdBlasZgerc clAmdBlasZgerc_pfn
-#undef clAmdBlasZgeru
-//#define clAmdBlasZgeru clAmdBlasZgeru_pfn
-#undef clAmdBlasZhbmv
-//#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn
-#undef clAmdBlasZhemm
-//#define clAmdBlasZhemm clAmdBlasZhemm_pfn
-#undef clAmdBlasZhemv
-//#define clAmdBlasZhemv clAmdBlasZhemv_pfn
-#undef clAmdBlasZher
-//#define clAmdBlasZher clAmdBlasZher_pfn
-#undef clAmdBlasZher2
-//#define clAmdBlasZher2 clAmdBlasZher2_pfn
-#undef clAmdBlasZher2k
-//#define clAmdBlasZher2k clAmdBlasZher2k_pfn
-#undef clAmdBlasZherk
-//#define clAmdBlasZherk clAmdBlasZherk_pfn
-#undef clAmdBlasZhpmv
-//#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn
-#undef clAmdBlasZhpr
-//#define clAmdBlasZhpr clAmdBlasZhpr_pfn
-#undef clAmdBlasZhpr2
-//#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn
-#undef clAmdBlasZrotg
-//#define clAmdBlasZrotg clAmdBlasZrotg_pfn
-#undef clAmdBlasZscal
-//#define clAmdBlasZscal clAmdBlasZscal_pfn
-#undef clAmdBlasZswap
-//#define clAmdBlasZswap clAmdBlasZswap_pfn
-#undef clAmdBlasZsymm
-//#define clAmdBlasZsymm clAmdBlasZsymm_pfn
-#undef clAmdBlasZsyr2k
-//#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn
-#undef clAmdBlasZsyr2kEx
-//#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn
-#undef clAmdBlasZsyrk
-//#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn
-#undef clAmdBlasZsyrkEx
-//#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn
-#undef clAmdBlasZtbmv
-//#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn
-#undef clAmdBlasZtbsv
-//#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn
-#undef clAmdBlasZtpmv
-//#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn
-#undef clAmdBlasZtpsv
-//#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn
-#undef clAmdBlasZtrmm
-//#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn
-#undef clAmdBlasZtrmmEx
-//#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn
-#undef clAmdBlasZtrmv
-//#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn
-#undef clAmdBlasZtrsm
-//#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn
-#undef clAmdBlasZtrsmEx
-//#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn
-#undef clAmdBlasZtrsv
-//#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn
-#undef clAmdBlasiCamax
-//#define clAmdBlasiCamax clAmdBlasiCamax_pfn
-#undef clAmdBlasiDamax
-//#define clAmdBlasiDamax clAmdBlasiDamax_pfn
-#undef clAmdBlasiSamax
-//#define clAmdBlasiSamax clAmdBlasiSamax_pfn
-#undef clAmdBlasiZamax
-//#define clAmdBlasiZamax clAmdBlasiZamax_pfn
-
-// generated by parser_clamdblas.py
-//extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)();
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)();
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
-//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
diff --git a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
deleted file mode 100644
index 1457d7eb8d69..000000000000
--- a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//
-// AUTOGENERATED, DO NOT EDIT
-//
-#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
-#error "Invalid usage"
-#endif
-
-// generated by parser_clamdfft.py
-#define clAmdFftBakePlan clAmdFftBakePlan_
-#define clAmdFftCopyPlan clAmdFftCopyPlan_
-#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_
-#define clAmdFftDestroyPlan clAmdFftDestroyPlan_
-#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_
-#define clAmdFftGetLayout clAmdFftGetLayout_
-#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_
-#define clAmdFftGetPlanContext clAmdFftGetPlanContext_
-#define clAmdFftGetPlanDim clAmdFftGetPlanDim_
-#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_
-#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_
-#define clAmdFftGetPlanLength clAmdFftGetPlanLength_
-#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_
-#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_
-#define clAmdFftGetPlanScale clAmdFftGetPlanScale_
-#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_
-#define clAmdFftGetResultLocation clAmdFftGetResultLocation_
-#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_
-#define clAmdFftGetVersion clAmdFftGetVersion_
-#define clAmdFftSetLayout clAmdFftSetLayout_
-#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_
-#define clAmdFftSetPlanDim clAmdFftSetPlanDim_
-#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_
-#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_
-#define clAmdFftSetPlanLength clAmdFftSetPlanLength_
-#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_
-#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_
-#define clAmdFftSetPlanScale clAmdFftSetPlanScale_
-#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_
-#define clAmdFftSetResultLocation clAmdFftSetResultLocation_
-#define clAmdFftSetup clAmdFftSetup_
-#define clAmdFftTeardown clAmdFftTeardown_
-
-#include
-
-// generated by parser_clamdfft.py
-#undef clAmdFftBakePlan
-#define clAmdFftBakePlan clAmdFftBakePlan_pfn
-#undef clAmdFftCopyPlan
-//#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn
-#undef clAmdFftCreateDefaultPlan
-#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn
-#undef clAmdFftDestroyPlan
-#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn
-#undef clAmdFftEnqueueTransform
-#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn
-#undef clAmdFftGetLayout
-//#define clAmdFftGetLayout clAmdFftGetLayout_pfn
-#undef clAmdFftGetPlanBatchSize
-//#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn
-#undef clAmdFftGetPlanContext
-//#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn
-#undef clAmdFftGetPlanDim
-//#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn
-#undef clAmdFftGetPlanDistance
-//#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn
-#undef clAmdFftGetPlanInStride
-//#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn
-#undef clAmdFftGetPlanLength
-//#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn
-#undef clAmdFftGetPlanOutStride
-//#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn
-#undef clAmdFftGetPlanPrecision
-//#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn
-#undef clAmdFftGetPlanScale
-//#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn
-#undef clAmdFftGetPlanTransposeResult
-//#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn
-#undef clAmdFftGetResultLocation
-//#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn
-#undef clAmdFftGetTmpBufSize
-#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn
-#undef clAmdFftGetVersion
-#define clAmdFftGetVersion clAmdFftGetVersion_pfn
-#undef clAmdFftSetLayout
-#define clAmdFftSetLayout clAmdFftSetLayout_pfn
-#undef clAmdFftSetPlanBatchSize
-#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn
-#undef clAmdFftSetPlanDim
-//#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn
-#undef clAmdFftSetPlanDistance
-#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn
-#undef clAmdFftSetPlanInStride
-#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn
-#undef clAmdFftSetPlanLength
-//#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn
-#undef clAmdFftSetPlanOutStride
-#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
-#undef clAmdFftSetPlanPrecision
-#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
-#undef clAmdFftSetPlanScale
-#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
-#undef clAmdFftSetPlanTransposeResult
-//#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn
-#undef clAmdFftSetResultLocation
-#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn
-#undef clAmdFftSetup
-#define clAmdFftSetup clAmdFftSetup_pfn
-#undef clAmdFftTeardown
-#define clAmdFftTeardown clAmdFftTeardown_pfn
-
-// generated by parser_clamdfft.py
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
-//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData);
-extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)();
diff --git a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp
new file mode 100644
index 000000000000..2749927bea65
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp
@@ -0,0 +1,602 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_clblas.py
+#define clblasCaxpy clblasCaxpy_
+#define clblasCcopy clblasCcopy_
+#define clblasCdotc clblasCdotc_
+#define clblasCdotu clblasCdotu_
+#define clblasCgbmv clblasCgbmv_
+#define clblasCgemm clblasCgemm_
+#define clblasCgemv clblasCgemv_
+#define clblasCgerc clblasCgerc_
+#define clblasCgeru clblasCgeru_
+#define clblasChbmv clblasChbmv_
+#define clblasChemm clblasChemm_
+#define clblasChemv clblasChemv_
+#define clblasCher clblasCher_
+#define clblasCher2 clblasCher2_
+#define clblasCher2k clblasCher2k_
+#define clblasCherk clblasCherk_
+#define clblasChpmv clblasChpmv_
+#define clblasChpr clblasChpr_
+#define clblasChpr2 clblasChpr2_
+#define clblasCrotg clblasCrotg_
+#define clblasCscal clblasCscal_
+#define clblasCsrot clblasCsrot_
+#define clblasCsscal clblasCsscal_
+#define clblasCswap clblasCswap_
+#define clblasCsymm clblasCsymm_
+#define clblasCsyr2k clblasCsyr2k_
+#define clblasCsyrk clblasCsyrk_
+#define clblasCtbmv clblasCtbmv_
+#define clblasCtbsv clblasCtbsv_
+#define clblasCtpmv clblasCtpmv_
+#define clblasCtpsv clblasCtpsv_
+#define clblasCtrmm clblasCtrmm_
+#define clblasCtrmv clblasCtrmv_
+#define clblasCtrsm clblasCtrsm_
+#define clblasCtrsv clblasCtrsv_
+#define clblasDasum clblasDasum_
+#define clblasDaxpy clblasDaxpy_
+#define clblasDcopy clblasDcopy_
+#define clblasDdot clblasDdot_
+#define clblasDgbmv clblasDgbmv_
+#define clblasDgemm clblasDgemm_
+#define clblasDgemv clblasDgemv_
+#define clblasDger clblasDger_
+#define clblasDnrm2 clblasDnrm2_
+#define clblasDrot clblasDrot_
+#define clblasDrotg clblasDrotg_
+#define clblasDrotm clblasDrotm_
+#define clblasDrotmg clblasDrotmg_
+#define clblasDsbmv clblasDsbmv_
+#define clblasDscal clblasDscal_
+#define clblasDspmv clblasDspmv_
+#define clblasDspr clblasDspr_
+#define clblasDspr2 clblasDspr2_
+#define clblasDswap clblasDswap_
+#define clblasDsymm clblasDsymm_
+#define clblasDsymv clblasDsymv_
+#define clblasDsyr clblasDsyr_
+#define clblasDsyr2 clblasDsyr2_
+#define clblasDsyr2k clblasDsyr2k_
+#define clblasDsyrk clblasDsyrk_
+#define clblasDtbmv clblasDtbmv_
+#define clblasDtbsv clblasDtbsv_
+#define clblasDtpmv clblasDtpmv_
+#define clblasDtpsv clblasDtpsv_
+#define clblasDtrmm clblasDtrmm_
+#define clblasDtrmv clblasDtrmv_
+#define clblasDtrsm clblasDtrsm_
+#define clblasDtrsv clblasDtrsv_
+#define clblasDzasum clblasDzasum_
+#define clblasDznrm2 clblasDznrm2_
+#define clblasGetVersion clblasGetVersion_
+#define clblasSasum clblasSasum_
+#define clblasSaxpy clblasSaxpy_
+#define clblasScasum clblasScasum_
+#define clblasScnrm2 clblasScnrm2_
+#define clblasScopy clblasScopy_
+#define clblasSdot clblasSdot_
+#define clblasSetup clblasSetup_
+#define clblasSgbmv clblasSgbmv_
+#define clblasSgemm clblasSgemm_
+#define clblasSgemv clblasSgemv_
+#define clblasSger clblasSger_
+#define clblasSnrm2 clblasSnrm2_
+#define clblasSrot clblasSrot_
+#define clblasSrotg clblasSrotg_
+#define clblasSrotm clblasSrotm_
+#define clblasSrotmg clblasSrotmg_
+#define clblasSsbmv clblasSsbmv_
+#define clblasSscal clblasSscal_
+#define clblasSspmv clblasSspmv_
+#define clblasSspr clblasSspr_
+#define clblasSspr2 clblasSspr2_
+#define clblasSswap clblasSswap_
+#define clblasSsymm clblasSsymm_
+#define clblasSsymv clblasSsymv_
+#define clblasSsyr clblasSsyr_
+#define clblasSsyr2 clblasSsyr2_
+#define clblasSsyr2k clblasSsyr2k_
+#define clblasSsyrk clblasSsyrk_
+#define clblasStbmv clblasStbmv_
+#define clblasStbsv clblasStbsv_
+#define clblasStpmv clblasStpmv_
+#define clblasStpsv clblasStpsv_
+#define clblasStrmm clblasStrmm_
+#define clblasStrmv clblasStrmv_
+#define clblasStrsm clblasStrsm_
+#define clblasStrsv clblasStrsv_
+#define clblasTeardown clblasTeardown_
+#define clblasZaxpy clblasZaxpy_
+#define clblasZcopy clblasZcopy_
+#define clblasZdotc clblasZdotc_
+#define clblasZdotu clblasZdotu_
+#define clblasZdrot clblasZdrot_
+#define clblasZdscal clblasZdscal_
+#define clblasZgbmv clblasZgbmv_
+#define clblasZgemm clblasZgemm_
+#define clblasZgemv clblasZgemv_
+#define clblasZgerc clblasZgerc_
+#define clblasZgeru clblasZgeru_
+#define clblasZhbmv clblasZhbmv_
+#define clblasZhemm clblasZhemm_
+#define clblasZhemv clblasZhemv_
+#define clblasZher clblasZher_
+#define clblasZher2 clblasZher2_
+#define clblasZher2k clblasZher2k_
+#define clblasZherk clblasZherk_
+#define clblasZhpmv clblasZhpmv_
+#define clblasZhpr clblasZhpr_
+#define clblasZhpr2 clblasZhpr2_
+#define clblasZrotg clblasZrotg_
+#define clblasZscal clblasZscal_
+#define clblasZswap clblasZswap_
+#define clblasZsymm clblasZsymm_
+#define clblasZsyr2k clblasZsyr2k_
+#define clblasZsyrk clblasZsyrk_
+#define clblasZtbmv clblasZtbmv_
+#define clblasZtbsv clblasZtbsv_
+#define clblasZtpmv clblasZtpmv_
+#define clblasZtpsv clblasZtpsv_
+#define clblasZtrmm clblasZtrmm_
+#define clblasZtrmv clblasZtrmv_
+#define clblasZtrsm clblasZtrsm_
+#define clblasZtrsv clblasZtrsv_
+#define clblasiCamax clblasiCamax_
+#define clblasiDamax clblasiDamax_
+#define clblasiSamax clblasiSamax_
+#define clblasiZamax clblasiZamax_
+
+#include
+
+// generated by parser_clblas.py
+#undef clblasCaxpy
+//#define clblasCaxpy clblasCaxpy_pfn
+#undef clblasCcopy
+//#define clblasCcopy clblasCcopy_pfn
+#undef clblasCdotc
+//#define clblasCdotc clblasCdotc_pfn
+#undef clblasCdotu
+//#define clblasCdotu clblasCdotu_pfn
+#undef clblasCgbmv
+//#define clblasCgbmv clblasCgbmv_pfn
+#undef clblasCgemm
+#define clblasCgemm clblasCgemm_pfn
+#undef clblasCgemv
+//#define clblasCgemv clblasCgemv_pfn
+#undef clblasCgerc
+//#define clblasCgerc clblasCgerc_pfn
+#undef clblasCgeru
+//#define clblasCgeru clblasCgeru_pfn
+#undef clblasChbmv
+//#define clblasChbmv clblasChbmv_pfn
+#undef clblasChemm
+//#define clblasChemm clblasChemm_pfn
+#undef clblasChemv
+//#define clblasChemv clblasChemv_pfn
+#undef clblasCher
+//#define clblasCher clblasCher_pfn
+#undef clblasCher2
+//#define clblasCher2 clblasCher2_pfn
+#undef clblasCher2k
+//#define clblasCher2k clblasCher2k_pfn
+#undef clblasCherk
+//#define clblasCherk clblasCherk_pfn
+#undef clblasChpmv
+//#define clblasChpmv clblasChpmv_pfn
+#undef clblasChpr
+//#define clblasChpr clblasChpr_pfn
+#undef clblasChpr2
+//#define clblasChpr2 clblasChpr2_pfn
+#undef clblasCrotg
+//#define clblasCrotg clblasCrotg_pfn
+#undef clblasCscal
+//#define clblasCscal clblasCscal_pfn
+#undef clblasCsrot
+//#define clblasCsrot clblasCsrot_pfn
+#undef clblasCsscal
+//#define clblasCsscal clblasCsscal_pfn
+#undef clblasCswap
+//#define clblasCswap clblasCswap_pfn
+#undef clblasCsymm
+//#define clblasCsymm clblasCsymm_pfn
+#undef clblasCsyr2k
+//#define clblasCsyr2k clblasCsyr2k_pfn
+#undef clblasCsyrk
+//#define clblasCsyrk clblasCsyrk_pfn
+#undef clblasCtbmv
+//#define clblasCtbmv clblasCtbmv_pfn
+#undef clblasCtbsv
+//#define clblasCtbsv clblasCtbsv_pfn
+#undef clblasCtpmv
+//#define clblasCtpmv clblasCtpmv_pfn
+#undef clblasCtpsv
+//#define clblasCtpsv clblasCtpsv_pfn
+#undef clblasCtrmm
+//#define clblasCtrmm clblasCtrmm_pfn
+#undef clblasCtrmv
+//#define clblasCtrmv clblasCtrmv_pfn
+#undef clblasCtrsm
+//#define clblasCtrsm clblasCtrsm_pfn
+#undef clblasCtrsv
+//#define clblasCtrsv clblasCtrsv_pfn
+#undef clblasDasum
+//#define clblasDasum clblasDasum_pfn
+#undef clblasDaxpy
+//#define clblasDaxpy clblasDaxpy_pfn
+#undef clblasDcopy
+//#define clblasDcopy clblasDcopy_pfn
+#undef clblasDdot
+//#define clblasDdot clblasDdot_pfn
+#undef clblasDgbmv
+//#define clblasDgbmv clblasDgbmv_pfn
+#undef clblasDgemm
+#define clblasDgemm clblasDgemm_pfn
+#undef clblasDgemv
+//#define clblasDgemv clblasDgemv_pfn
+#undef clblasDger
+//#define clblasDger clblasDger_pfn
+#undef clblasDnrm2
+//#define clblasDnrm2 clblasDnrm2_pfn
+#undef clblasDrot
+//#define clblasDrot clblasDrot_pfn
+#undef clblasDrotg
+//#define clblasDrotg clblasDrotg_pfn
+#undef clblasDrotm
+//#define clblasDrotm clblasDrotm_pfn
+#undef clblasDrotmg
+//#define clblasDrotmg clblasDrotmg_pfn
+#undef clblasDsbmv
+//#define clblasDsbmv clblasDsbmv_pfn
+#undef clblasDscal
+//#define clblasDscal clblasDscal_pfn
+#undef clblasDspmv
+//#define clblasDspmv clblasDspmv_pfn
+#undef clblasDspr
+//#define clblasDspr clblasDspr_pfn
+#undef clblasDspr2
+//#define clblasDspr2 clblasDspr2_pfn
+#undef clblasDswap
+//#define clblasDswap clblasDswap_pfn
+#undef clblasDsymm
+//#define clblasDsymm clblasDsymm_pfn
+#undef clblasDsymv
+//#define clblasDsymv clblasDsymv_pfn
+#undef clblasDsyr
+//#define clblasDsyr clblasDsyr_pfn
+#undef clblasDsyr2
+//#define clblasDsyr2 clblasDsyr2_pfn
+#undef clblasDsyr2k
+//#define clblasDsyr2k clblasDsyr2k_pfn
+#undef clblasDsyrk
+//#define clblasDsyrk clblasDsyrk_pfn
+#undef clblasDtbmv
+//#define clblasDtbmv clblasDtbmv_pfn
+#undef clblasDtbsv
+//#define clblasDtbsv clblasDtbsv_pfn
+#undef clblasDtpmv
+//#define clblasDtpmv clblasDtpmv_pfn
+#undef clblasDtpsv
+//#define clblasDtpsv clblasDtpsv_pfn
+#undef clblasDtrmm
+//#define clblasDtrmm clblasDtrmm_pfn
+#undef clblasDtrmv
+//#define clblasDtrmv clblasDtrmv_pfn
+#undef clblasDtrsm
+//#define clblasDtrsm clblasDtrsm_pfn
+#undef clblasDtrsv
+//#define clblasDtrsv clblasDtrsv_pfn
+#undef clblasDzasum
+//#define clblasDzasum clblasDzasum_pfn
+#undef clblasDznrm2
+//#define clblasDznrm2 clblasDznrm2_pfn
+#undef clblasGetVersion
+//#define clblasGetVersion clblasGetVersion_pfn
+#undef clblasSasum
+//#define clblasSasum clblasSasum_pfn
+#undef clblasSaxpy
+//#define clblasSaxpy clblasSaxpy_pfn
+#undef clblasScasum
+//#define clblasScasum clblasScasum_pfn
+#undef clblasScnrm2
+//#define clblasScnrm2 clblasScnrm2_pfn
+#undef clblasScopy
+//#define clblasScopy clblasScopy_pfn
+#undef clblasSdot
+//#define clblasSdot clblasSdot_pfn
+#undef clblasSetup
+#define clblasSetup clblasSetup_pfn
+#undef clblasSgbmv
+//#define clblasSgbmv clblasSgbmv_pfn
+#undef clblasSgemm
+#define clblasSgemm clblasSgemm_pfn
+#undef clblasSgemv
+//#define clblasSgemv clblasSgemv_pfn
+#undef clblasSger
+//#define clblasSger clblasSger_pfn
+#undef clblasSnrm2
+//#define clblasSnrm2 clblasSnrm2_pfn
+#undef clblasSrot
+//#define clblasSrot clblasSrot_pfn
+#undef clblasSrotg
+//#define clblasSrotg clblasSrotg_pfn
+#undef clblasSrotm
+//#define clblasSrotm clblasSrotm_pfn
+#undef clblasSrotmg
+//#define clblasSrotmg clblasSrotmg_pfn
+#undef clblasSsbmv
+//#define clblasSsbmv clblasSsbmv_pfn
+#undef clblasSscal
+//#define clblasSscal clblasSscal_pfn
+#undef clblasSspmv
+//#define clblasSspmv clblasSspmv_pfn
+#undef clblasSspr
+//#define clblasSspr clblasSspr_pfn
+#undef clblasSspr2
+//#define clblasSspr2 clblasSspr2_pfn
+#undef clblasSswap
+//#define clblasSswap clblasSswap_pfn
+#undef clblasSsymm
+//#define clblasSsymm clblasSsymm_pfn
+#undef clblasSsymv
+//#define clblasSsymv clblasSsymv_pfn
+#undef clblasSsyr
+//#define clblasSsyr clblasSsyr_pfn
+#undef clblasSsyr2
+//#define clblasSsyr2 clblasSsyr2_pfn
+#undef clblasSsyr2k
+//#define clblasSsyr2k clblasSsyr2k_pfn
+#undef clblasSsyrk
+//#define clblasSsyrk clblasSsyrk_pfn
+#undef clblasStbmv
+//#define clblasStbmv clblasStbmv_pfn
+#undef clblasStbsv
+//#define clblasStbsv clblasStbsv_pfn
+#undef clblasStpmv
+//#define clblasStpmv clblasStpmv_pfn
+#undef clblasStpsv
+//#define clblasStpsv clblasStpsv_pfn
+#undef clblasStrmm
+//#define clblasStrmm clblasStrmm_pfn
+#undef clblasStrmv
+//#define clblasStrmv clblasStrmv_pfn
+#undef clblasStrsm
+//#define clblasStrsm clblasStrsm_pfn
+#undef clblasStrsv
+//#define clblasStrsv clblasStrsv_pfn
+#undef clblasTeardown
+#define clblasTeardown clblasTeardown_pfn
+#undef clblasZaxpy
+//#define clblasZaxpy clblasZaxpy_pfn
+#undef clblasZcopy
+//#define clblasZcopy clblasZcopy_pfn
+#undef clblasZdotc
+//#define clblasZdotc clblasZdotc_pfn
+#undef clblasZdotu
+//#define clblasZdotu clblasZdotu_pfn
+#undef clblasZdrot
+//#define clblasZdrot clblasZdrot_pfn
+#undef clblasZdscal
+//#define clblasZdscal clblasZdscal_pfn
+#undef clblasZgbmv
+//#define clblasZgbmv clblasZgbmv_pfn
+#undef clblasZgemm
+#define clblasZgemm clblasZgemm_pfn
+#undef clblasZgemv
+//#define clblasZgemv clblasZgemv_pfn
+#undef clblasZgerc
+//#define clblasZgerc clblasZgerc_pfn
+#undef clblasZgeru
+//#define clblasZgeru clblasZgeru_pfn
+#undef clblasZhbmv
+//#define clblasZhbmv clblasZhbmv_pfn
+#undef clblasZhemm
+//#define clblasZhemm clblasZhemm_pfn
+#undef clblasZhemv
+//#define clblasZhemv clblasZhemv_pfn
+#undef clblasZher
+//#define clblasZher clblasZher_pfn
+#undef clblasZher2
+//#define clblasZher2 clblasZher2_pfn
+#undef clblasZher2k
+//#define clblasZher2k clblasZher2k_pfn
+#undef clblasZherk
+//#define clblasZherk clblasZherk_pfn
+#undef clblasZhpmv
+//#define clblasZhpmv clblasZhpmv_pfn
+#undef clblasZhpr
+//#define clblasZhpr clblasZhpr_pfn
+#undef clblasZhpr2
+//#define clblasZhpr2 clblasZhpr2_pfn
+#undef clblasZrotg
+//#define clblasZrotg clblasZrotg_pfn
+#undef clblasZscal
+//#define clblasZscal clblasZscal_pfn
+#undef clblasZswap
+//#define clblasZswap clblasZswap_pfn
+#undef clblasZsymm
+//#define clblasZsymm clblasZsymm_pfn
+#undef clblasZsyr2k
+//#define clblasZsyr2k clblasZsyr2k_pfn
+#undef clblasZsyrk
+//#define clblasZsyrk clblasZsyrk_pfn
+#undef clblasZtbmv
+//#define clblasZtbmv clblasZtbmv_pfn
+#undef clblasZtbsv
+//#define clblasZtbsv clblasZtbsv_pfn
+#undef clblasZtpmv
+//#define clblasZtpmv clblasZtpmv_pfn
+#undef clblasZtpsv
+//#define clblasZtpsv clblasZtpsv_pfn
+#undef clblasZtrmm
+//#define clblasZtrmm clblasZtrmm_pfn
+#undef clblasZtrmv
+//#define clblasZtrmv clblasZtrmv_pfn
+#undef clblasZtrsm
+//#define clblasZtrsm clblasZtrsm_pfn
+#undef clblasZtrsv
+//#define clblasZtrsv clblasZtrsv_pfn
+#undef clblasiCamax
+//#define clblasiCamax clblasiCamax_pfn
+#undef clblasiDamax
+//#define clblasiDamax clblasiDamax_pfn
+#undef clblasiSamax
+//#define clblasiSamax clblasiSamax_pfn
+#undef clblasiZamax
+//#define clblasiZamax clblasiZamax_pfn
+
+// generated by parser_clblas.py
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgerc)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgeru)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemv)(clblasOrder order, clblasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDger)(clblasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clblasStatus (*clblasSetup)();
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSger)(clblasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT void (*clblasTeardown)();
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgerc)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgeru)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemv)(clblasOrder order, clblasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
diff --git a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp
new file mode 100644
index 000000000000..dff3b406a611
--- /dev/null
+++ b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp
@@ -0,0 +1,146 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_clfft.py
+#define clfftBakePlan clfftBakePlan_
+#define clfftCopyPlan clfftCopyPlan_
+#define clfftCreateDefaultPlan clfftCreateDefaultPlan_
+#define clfftDestroyPlan clfftDestroyPlan_
+#define clfftEnqueueTransform clfftEnqueueTransform_
+#define clfftGetLayout clfftGetLayout_
+#define clfftGetPlanBatchSize clfftGetPlanBatchSize_
+#define clfftGetPlanContext clfftGetPlanContext_
+#define clfftGetPlanDim clfftGetPlanDim_
+#define clfftGetPlanDistance clfftGetPlanDistance_
+#define clfftGetPlanInStride clfftGetPlanInStride_
+#define clfftGetPlanLength clfftGetPlanLength_
+#define clfftGetPlanOutStride clfftGetPlanOutStride_
+#define clfftGetPlanPrecision clfftGetPlanPrecision_
+#define clfftGetPlanScale clfftGetPlanScale_
+#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_
+#define clfftGetResultLocation clfftGetResultLocation_
+#define clfftGetTmpBufSize clfftGetTmpBufSize_
+#define clfftGetVersion clfftGetVersion_
+#define clfftSetLayout clfftSetLayout_
+#define clfftSetPlanBatchSize clfftSetPlanBatchSize_
+#define clfftSetPlanCallback clfftSetPlanCallback_
+#define clfftSetPlanDim clfftSetPlanDim_
+#define clfftSetPlanDistance clfftSetPlanDistance_
+#define clfftSetPlanInStride clfftSetPlanInStride_
+#define clfftSetPlanLength clfftSetPlanLength_
+#define clfftSetPlanOutStride clfftSetPlanOutStride_
+#define clfftSetPlanPrecision clfftSetPlanPrecision_
+#define clfftSetPlanScale clfftSetPlanScale_
+#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_
+#define clfftSetResultLocation clfftSetResultLocation_
+#define clfftSetup clfftSetup_
+#define clfftTeardown clfftTeardown_
+
+#include
+
+// generated by parser_clfft.py
+#undef clfftBakePlan
+#define clfftBakePlan clfftBakePlan_pfn
+#undef clfftCopyPlan
+//#define clfftCopyPlan clfftCopyPlan_pfn
+#undef clfftCreateDefaultPlan
+#define clfftCreateDefaultPlan clfftCreateDefaultPlan_pfn
+#undef clfftDestroyPlan
+#define clfftDestroyPlan clfftDestroyPlan_pfn
+#undef clfftEnqueueTransform
+#define clfftEnqueueTransform clfftEnqueueTransform_pfn
+#undef clfftGetLayout
+//#define clfftGetLayout clfftGetLayout_pfn
+#undef clfftGetPlanBatchSize
+//#define clfftGetPlanBatchSize clfftGetPlanBatchSize_pfn
+#undef clfftGetPlanContext
+//#define clfftGetPlanContext clfftGetPlanContext_pfn
+#undef clfftGetPlanDim
+//#define clfftGetPlanDim clfftGetPlanDim_pfn
+#undef clfftGetPlanDistance
+//#define clfftGetPlanDistance clfftGetPlanDistance_pfn
+#undef clfftGetPlanInStride
+//#define clfftGetPlanInStride clfftGetPlanInStride_pfn
+#undef clfftGetPlanLength
+//#define clfftGetPlanLength clfftGetPlanLength_pfn
+#undef clfftGetPlanOutStride
+//#define clfftGetPlanOutStride clfftGetPlanOutStride_pfn
+#undef clfftGetPlanPrecision
+//#define clfftGetPlanPrecision clfftGetPlanPrecision_pfn
+#undef clfftGetPlanScale
+//#define clfftGetPlanScale clfftGetPlanScale_pfn
+#undef clfftGetPlanTransposeResult
+//#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_pfn
+#undef clfftGetResultLocation
+//#define clfftGetResultLocation clfftGetResultLocation_pfn
+#undef clfftGetTmpBufSize
+#define clfftGetTmpBufSize clfftGetTmpBufSize_pfn
+#undef clfftGetVersion
+#define clfftGetVersion clfftGetVersion_pfn
+#undef clfftSetLayout
+#define clfftSetLayout clfftSetLayout_pfn
+#undef clfftSetPlanBatchSize
+#define clfftSetPlanBatchSize clfftSetPlanBatchSize_pfn
+#undef clfftSetPlanCallback
+//#define clfftSetPlanCallback clfftSetPlanCallback_pfn
+#undef clfftSetPlanDim
+//#define clfftSetPlanDim clfftSetPlanDim_pfn
+#undef clfftSetPlanDistance
+#define clfftSetPlanDistance clfftSetPlanDistance_pfn
+#undef clfftSetPlanInStride
+#define clfftSetPlanInStride clfftSetPlanInStride_pfn
+#undef clfftSetPlanLength
+//#define clfftSetPlanLength clfftSetPlanLength_pfn
+#undef clfftSetPlanOutStride
+#define clfftSetPlanOutStride clfftSetPlanOutStride_pfn
+#undef clfftSetPlanPrecision
+#define clfftSetPlanPrecision clfftSetPlanPrecision_pfn
+#undef clfftSetPlanScale
+#define clfftSetPlanScale clfftSetPlanScale_pfn
+#undef clfftSetPlanTransposeResult
+//#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_pfn
+#undef clfftSetResultLocation
+#define clfftSetResultLocation clfftSetResultLocation_pfn
+#undef clfftSetup
+#define clfftSetup clfftSetup_pfn
+#undef clfftTeardown
+#define clfftTeardown clfftTeardown_pfn
+
+// generated by parser_clfft.py
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftBakePlan)(clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clfftPlanHandle plHandle, void* user_data), void* user_data);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftCopyPlan)(clfftPlanHandle* out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftCreateDefaultPlan)(clfftPlanHandle* plHandle, cl_context context, const clfftDim dim, const size_t* clLengths);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftDestroyPlan)(clfftPlanHandle* plHandle);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftEnqueueTransform)(clfftPlanHandle plHandle, clfftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetLayout)(const clfftPlanHandle plHandle, clfftLayout* iLayout, clfftLayout* oLayout);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanBatchSize)(const clfftPlanHandle plHandle, size_t* batchSize);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanContext)(const clfftPlanHandle plHandle, cl_context* context);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDim)(const clfftPlanHandle plHandle, clfftDim* dim, cl_uint* size);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDistance)(const clfftPlanHandle plHandle, size_t* iDist, size_t* oDist);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanInStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanLength)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clLengths);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanOutStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanPrecision)(const clfftPlanHandle plHandle, clfftPrecision* precision);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanScale)(const clfftPlanHandle plHandle, clfftDirection dir, cl_float* scale);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanTransposeResult)(const clfftPlanHandle plHandle, clfftResultTransposed* transposed);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetResultLocation)(const clfftPlanHandle plHandle, clfftResultLocation* placeness);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetTmpBufSize)(const clfftPlanHandle plHandle, size_t* buffersize);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetLayout)(clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanBatchSize)(clfftPlanHandle plHandle, size_t batchSize);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanCallback)(clfftPlanHandle plHandle, const char* funcName, const char* funcString, int localMemSize, clfftCallbackType callbackType, cl_mem* userdata, int numUserdataBuffers);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDim)(clfftPlanHandle plHandle, const clfftDim dim);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDistance)(clfftPlanHandle plHandle, size_t iDist, size_t oDist);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanInStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanLength)(clfftPlanHandle plHandle, const clfftDim dim, const size_t* clLengths);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanOutStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanPrecision)(clfftPlanHandle plHandle, clfftPrecision precision);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanScale)(clfftPlanHandle plHandle, clfftDirection dir, cl_float scale);
+//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanTransposeResult)(clfftPlanHandle plHandle, clfftResultTransposed transposed);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetResultLocation)(clfftPlanHandle plHandle, clfftResultLocation placeness);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetup)(const clfftSetupData* setupData);
+extern CL_RUNTIME_EXPORT clfftStatus (*clfftTeardown)();
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_clamdblas.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_clblas.hpp
similarity index 98%
rename from modules/core/include/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
rename to modules/core/include/opencv2/core/opencl/runtime/opencl_clblas.hpp
index 2ad8ac0b5db8..ccddf8f76c19 100644
--- a/modules/core/include/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_clblas.hpp
@@ -46,7 +46,7 @@
#include "opencl_core.hpp"
-#include "autogenerated/opencl_clamdblas.hpp"
+#include "autogenerated/opencl_clblas.hpp"
#endif // HAVE_CLAMDBLAS
diff --git a/modules/core/include/opencv2/core/opencl/runtime/opencl_clamdfft.hpp b/modules/core/include/opencv2/core/opencl/runtime/opencl_clfft.hpp
similarity index 98%
rename from modules/core/include/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
rename to modules/core/include/opencv2/core/opencl/runtime/opencl_clfft.hpp
index a328f722fcca..7f4af5e60b7e 100644
--- a/modules/core/include/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
+++ b/modules/core/include/opencv2/core/opencl/runtime/opencl_clfft.hpp
@@ -46,7 +46,7 @@
#include "opencl_core.hpp"
-#include "autogenerated/opencl_clamdfft.hpp"
+#include "autogenerated/opencl_clfft.hpp"
#endif // HAVE_CLAMDFFT
diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h
index 97aeab375ffd..32f3c8c99998 100644
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -358,7 +358,11 @@ _IplImage
needed for correct deallocation */
#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
- _IplImage() {}
+ _IplImage()
+ {
+ memset(this, 0, sizeof(*this)); // valid for POD structure
+ nSize = sizeof(IplImage);
+ }
_IplImage(const cv::Mat& m) { *this = cvIplImage(m); }
#endif
}
diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index f0368027aa6a..108c0d93e749 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -714,9 +714,27 @@ void Mat::forEach_impl(const Functor& operation) {
/////////////////////////// Synchronization Primitives ///////////////////////////////
#if !defined(_M_CEE)
+#ifndef OPENCV_DISABLE_THREAD_SUPPORT
typedef std::recursive_mutex Mutex;
typedef std::lock_guard AutoLock;
-#endif
+#else // OPENCV_DISABLE_THREAD_SUPPORT
+// Custom (failing) implementation of `std::recursive_mutex`.
+struct Mutex {
+ void lock(){
+ CV_Error(cv::Error::StsNotImplemented,
+ "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON");
+ }
+ void unlock(){
+ CV_Error(cv::Error::StsNotImplemented,
+ "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON");
+ }
+};
+// Stub for cv::AutoLock when threads are disabled.
+struct AutoLock {
+ AutoLock(Mutex &) { }
+};
+#endif // OPENCV_DISABLE_THREAD_SUPPORT
+#endif // !defined(_M_CEE)
/** @brief Designed for command line parsing
diff --git a/modules/core/include/opencv2/core/utils/filesystem.private.hpp b/modules/core/include/opencv2/core/utils/filesystem.private.hpp
index ea2591c9de1d..72b2bb947968 100644
--- a/modules/core/include/opencv2/core/utils/filesystem.private.hpp
+++ b/modules/core/include/opencv2/core/utils/filesystem.private.hpp
@@ -16,8 +16,8 @@
# define OPENCV_HAVE_FILESYSTEM_SUPPORT 1
# elif defined(__APPLE__)
# include
-# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (!defined(TARGET_OS_OSX) && !TARGET_OS_IPHONE)
-# define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 // OSX only
+# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (defined(TARGET_OS_IOS) && TARGET_OS_IOS)
+# define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 // OSX, iOS only
# endif
# else
/* unknown */
diff --git a/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp b/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp
index bc3ae4d08a7a..d6390fc74a48 100644
--- a/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp
+++ b/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp
@@ -80,7 +80,9 @@ LibHandle_t libraryLoad_(const FileSystemPath_t& filename)
return LoadLibraryW(filename.c_str());
#endif
#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__)
- return dlopen(filename.c_str(), RTLD_NOW);
+ void* handle = dlopen(filename.c_str(), RTLD_NOW);
+ CV_LOG_IF_DEBUG(NULL, !handle, "dlopen() error: " << dlerror());
+ return handle;
#endif
}
diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp
index ad6c36157736..f627d7147265 100644
--- a/modules/core/include/opencv2/core/version.hpp
+++ b/modules/core/include/opencv2/core/version.hpp
@@ -7,7 +7,7 @@
#define CV_VERSION_MAJOR 4
#define CV_VERSION_MINOR 5
-#define CV_VERSION_REVISION 2
+#define CV_VERSION_REVISION 3
#define CV_VERSION_STATUS "-dev"
#define CVAUX_STR_EXP(__A) #__A
diff --git a/modules/core/misc/java/src/java/core+Mat.java b/modules/core/misc/java/src/java/core+Mat.java
index 641d9f8ae843..5fcc72773873 100644
--- a/modules/core/misc/java/src/java/core+Mat.java
+++ b/modules/core/misc/java/src/java/core+Mat.java
@@ -1128,6 +1128,458 @@ public int width() {
return cols();
}
+ // javadoc:Mat::at(clazz, row, col)
+ @SuppressWarnings("unchecked")
+ public Atable at(Class clazz, int row, int col) {
+ if (clazz == Byte.class || clazz == byte.class) {
+ return (Atable)new AtableByte(this, row, col);
+ } else if (clazz == Double.class || clazz == double.class) {
+ return (Atable)new AtableDouble(this, row, col);
+ } else if (clazz == Float.class || clazz == float.class) {
+ return (Atable)new AtableFloat(this, row, col);
+ } else if (clazz == Integer.class || clazz == int.class) {
+ return (Atable)new AtableInteger(this, row, col);
+ } else if (clazz == Short.class || clazz == short.class) {
+ return (Atable)new AtableShort(this, row, col);
+ } else {
+ throw new RuntimeException("Unsupported class type");
+ }
+ }
+
+ // javadoc:Mat::at(clazz, idx)
+ @SuppressWarnings("unchecked")
+ public Atable at(Class clazz, int[] idx) {
+ if (clazz == Byte.class || clazz == byte.class) {
+ return (Atable)new AtableByte(this, idx);
+ } else if (clazz == Double.class || clazz == double.class) {
+ return (Atable)new AtableDouble(this, idx);
+ } else if (clazz == Float.class || clazz == float.class) {
+ return (Atable)new AtableFloat(this, idx);
+ } else if (clazz == Integer.class || clazz == int.class) {
+ return (Atable)new AtableInteger(this, idx);
+ } else if (clazz == Short.class || clazz == short.class) {
+ return (Atable)new AtableShort(this, idx);
+ } else {
+ throw new RuntimeException("Unsupported class parameter");
+ }
+ }
+
+ public static class Tuple2 {
+ public Tuple2(T _0, T _1) {
+ this._0 = _0;
+ this._1 = _1;
+ }
+
+ public T get_0() {
+ return _0;
+ }
+
+ public T get_1() {
+ return _1;
+ }
+
+ private final T _0;
+ private final T _1;
+ }
+
+ public static class Tuple3 {
+ public Tuple3(T _0, T _1, T _2) {
+ this._0 = _0;
+ this._1 = _1;
+ this._2 = _2;
+ }
+
+ public T get_0() {
+ return _0;
+ }
+
+ public T get_1() {
+ return _1;
+ }
+
+ public T get_2() {
+ return _2;
+ }
+
+ private final T _0;
+ private final T _1;
+ private final T _2;
+ }
+
+ public static class Tuple4 {
+ public Tuple4(T _0, T _1, T _2, T _3) {
+ this._0 = _0;
+ this._1 = _1;
+ this._2 = _2;
+ this._3 = _3;
+ }
+
+ public T get_0() {
+ return _0;
+ }
+
+ public T get_1() {
+ return _1;
+ }
+
+ public T get_2() {
+ return _2;
+ }
+
+ public T get_3() {
+ return _3;
+ }
+
+ private final T _0;
+ private final T _1;
+ private final T _2;
+ private final T _3;
+ }
+
+ public interface Atable {
+ T getV();
+ void setV(T v);
+ Tuple2 getV2c();
+ void setV2c(Tuple2 v);
+ Tuple3 getV3c();
+ void setV3c(Tuple3 v);
+ Tuple4 getV4c();
+ void setV4c(Tuple4 v);
+ }
+
+ private static class AtableBase {
+
+ protected AtableBase(Mat mat, int row, int col) {
+ this.mat = mat;
+ indices = new int[2];
+ indices[0] = row;
+ indices[1] = col;
+ }
+
+ protected AtableBase(Mat mat, int[] indices) {
+ this.mat = mat;
+ this.indices = indices;
+ }
+
+ protected final Mat mat;
+ protected final int[] indices;
+ }
+
+ private static class AtableByte extends AtableBase implements Atable {
+
+ public AtableByte(Mat mat, int row, int col) {
+ super(mat, row, col);
+ }
+
+ public AtableByte(Mat mat, int[] indices) {
+ super(mat, indices);
+ }
+
+ @Override
+ public Byte getV() {
+ byte[] data = new byte[1];
+ mat.get(indices, data);
+ return data[0];
+ }
+
+ @Override
+ public void setV(Byte v) {
+ byte[] data = new byte[] { v };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple2 getV2c() {
+ byte[] data = new byte[2];
+ mat.get(indices, data);
+ return new Tuple2(data[0], data[1]);
+ }
+
+ @Override
+ public void setV2c(Tuple2 v) {
+ byte[] data = new byte[] { v._0, v._1 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple3 getV3c() {
+ byte[] data = new byte[3];
+ mat.get(indices, data);
+ return new Tuple3(data[0], data[1], data[2]);
+ }
+
+ @Override
+ public void setV3c(Tuple3 v) {
+ byte[] data = new byte[] { v._0, v._1, v._2 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple4 getV4c() {
+ byte[] data = new byte[4];
+ mat.get(indices, data);
+ return new Tuple4(data[0], data[1], data[2], data[3]);
+ }
+
+ @Override
+ public void setV4c(Tuple4 v) {
+ byte[] data = new byte[] { v._0, v._1, v._2, v._3 };
+ mat.put(indices, data);
+ }
+ }
+
+ private static class AtableDouble extends AtableBase implements Atable {
+
+ public AtableDouble(Mat mat, int row, int col) {
+ super(mat, row, col);
+ }
+
+ public AtableDouble(Mat mat, int[] indices) {
+ super(mat, indices);
+ }
+
+ @Override
+ public Double getV() {
+ double[] data = new double[1];
+ mat.get(indices, data);
+ return data[0];
+ }
+
+ @Override
+ public void setV(Double v) {
+ double[] data = new double[] { v };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple2 getV2c() {
+ double[] data = new double[2];
+ mat.get(indices, data);
+ return new Tuple2(data[0], data[1]);
+ }
+
+ @Override
+ public void setV2c(Tuple2 v) {
+ double[] data = new double[] { v._0, v._1 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple3 getV3c() {
+ double[] data = new double[3];
+ mat.get(indices, data);
+ return new Tuple3(data[0], data[1], data[2]);
+ }
+
+ @Override
+ public void setV3c(Tuple3 v) {
+ double[] data = new double[] { v._0, v._1, v._2 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple4 getV4c() {
+ double[] data = new double[4];
+ mat.get(indices, data);
+ return new Tuple4(data[0], data[1], data[2], data[3]);
+ }
+
+ @Override
+ public void setV4c(Tuple4 v) {
+ double[] data = new double[] { v._0, v._1, v._2, v._3 };
+ mat.put(indices, data);
+ }
+ }
+
+ private static class AtableFloat extends AtableBase implements Atable {
+
+ public AtableFloat(Mat mat, int row, int col) {
+ super(mat, row, col);
+ }
+
+ public AtableFloat(Mat mat, int[] indices) {
+ super(mat, indices);
+ }
+
+ @Override
+ public Float getV() {
+ float[] data = new float[1];
+ mat.get(indices, data);
+ return data[0];
+ }
+
+ @Override
+ public void setV(Float v) {
+ float[] data = new float[] { v };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple2 getV2c() {
+ float[] data = new float[2];
+ mat.get(indices, data);
+ return new Tuple2(data[0], data[1]);
+ }
+
+ @Override
+ public void setV2c(Tuple2 v) {
+ float[] data = new float[] { v._0, v._1 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple3 getV3c() {
+ float[] data = new float[3];
+ mat.get(indices, data);
+ return new Tuple3(data[0], data[1], data[2]);
+ }
+
+ @Override
+ public void setV3c(Tuple3 v) {
+ float[] data = new float[] { v._0, v._1, v._2 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple4 getV4c() {
+ float[] data = new float[4];
+ mat.get(indices, data);
+ return new Tuple4(data[0], data[1], data[2], data[3]);
+ }
+
+ @Override
+ public void setV4c(Tuple4 v) {
+ double[] data = new double[] { v._0, v._1, v._2, v._3 };
+ mat.put(indices, data);
+ }
+ }
+
+ private static class AtableInteger extends AtableBase implements Atable {
+
+ public AtableInteger(Mat mat, int row, int col) {
+ super(mat, row, col);
+ }
+
+ public AtableInteger(Mat mat, int[] indices) {
+ super(mat, indices);
+ }
+
+ @Override
+ public Integer getV() {
+ int[] data = new int[1];
+ mat.get(indices, data);
+ return data[0];
+ }
+
+ @Override
+ public void setV(Integer v) {
+ int[] data = new int[] { v };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple2 getV2c() {
+ int[] data = new int[2];
+ mat.get(indices, data);
+ return new Tuple2(data[0], data[1]);
+ }
+
+ @Override
+ public void setV2c(Tuple2 v) {
+ int[] data = new int[] { v._0, v._1 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple3 getV3c() {
+ int[] data = new int[3];
+ mat.get(indices, data);
+ return new Tuple3(data[0], data[1], data[2]);
+ }
+
+ @Override
+ public void setV3c(Tuple3 v) {
+ int[] data = new int[] { v._0, v._1, v._2 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple4 getV4c() {
+ int[] data = new int[4];
+ mat.get(indices, data);
+ return new Tuple4(data[0], data[1], data[2], data[3]);
+ }
+
+ @Override
+ public void setV4c(Tuple4 v) {
+ int[] data = new int[] { v._0, v._1, v._2, v._3 };
+ mat.put(indices, data);
+ }
+ }
+
+ private static class AtableShort extends AtableBase implements Atable {
+
+ public AtableShort(Mat mat, int row, int col) {
+ super(mat, row, col);
+ }
+
+ public AtableShort(Mat mat, int[] indices) {
+ super(mat, indices);
+ }
+
+ @Override
+ public Short getV() {
+ short[] data = new short[1];
+ mat.get(indices, data);
+ return data[0];
+ }
+
+ @Override
+ public void setV(Short v) {
+ short[] data = new short[] { v };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple2 getV2c() {
+ short[] data = new short[2];
+ mat.get(indices, data);
+ return new Tuple2(data[0], data[1]);
+ }
+
+ @Override
+ public void setV2c(Tuple2 v) {
+ short[] data = new short[] { v._0, v._1 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple3 getV3c() {
+ short[] data = new short[3];
+ mat.get(indices, data);
+ return new Tuple3(data[0], data[1], data[2]);
+ }
+
+ @Override
+ public void setV3c(Tuple3 v) {
+ short[] data = new short[] { v._0, v._1, v._2 };
+ mat.put(indices, data);
+ }
+
+ @Override
+ public Tuple4 getV4c() {
+ short[] data = new short[4];
+ mat.get(indices, data);
+ return new Tuple4(data[0], data[1], data[2], data[3]);
+ }
+
+ @Override
+ public void setV4c(Tuple4 v) {
+ short[] data = new short[] { v._0, v._1, v._2, v._3 };
+ mat.put(indices, data);
+ }
+ }
+
// javadoc:Mat::getNativeObjAddr()
public long getNativeObjAddr() {
return nativeObj;
diff --git a/modules/core/misc/java/src/java/core+MatAt.kt b/modules/core/misc/java/src/java/core+MatAt.kt
new file mode 100644
index 000000000000..c81e21057f27
--- /dev/null
+++ b/modules/core/misc/java/src/java/core+MatAt.kt
@@ -0,0 +1,160 @@
+package org.opencv.core
+
+import org.opencv.core.Mat.*
+import java.lang.RuntimeException
+
+fun Mat.get(row: Int, col: Int, data: UByteArray) = this.get(row, col, data.asByteArray())
+fun Mat.get(indices: IntArray, data: UByteArray) = this.get(indices, data.asByteArray())
+fun Mat.put(row: Int, col: Int, data: UByteArray) = this.put(row, col, data.asByteArray())
+fun Mat.put(indices: IntArray, data: UByteArray) = this.put(indices, data.asByteArray())
+
+fun Mat.get(row: Int, col: Int, data: UShortArray) = this.get(row, col, data.asShortArray())
+fun Mat.get(indices: IntArray, data: UShortArray) = this.get(indices, data.asShortArray())
+fun Mat.put(row: Int, col: Int, data: UShortArray) = this.put(row, col, data.asShortArray())
+fun Mat.put(indices: IntArray, data: UShortArray) = this.put(indices, data.asShortArray())
+
+/***
+ * Example use:
+ *
+ * val (b, g, r) = mat.at(50, 50).v3c
+ * mat.at(50, 50).val = T3(245u, 113u, 34u)
+ *
+ */
+@Suppress("UNCHECKED_CAST")
+inline fun Mat.at(row: Int, col: Int) : Atable =
+ when (T::class) {
+ Byte::class, Double::class, Float::class, Int::class, Short::class -> this.at(
+ T::class.java,
+ row,
+ col
+ )
+ UByte::class -> AtableUByte(this, row, col) as Atable
+ UShort::class -> AtableUShort(this, row, col) as Atable
+ else -> throw RuntimeException("Unsupported class type")
+ }
+
+@Suppress("UNCHECKED_CAST")
+inline fun Mat.at(idx: IntArray) : Atable =
+ when (T::class) {
+ Byte::class, Double::class, Float::class, Int::class, Short::class -> this.at(
+ T::class.java,
+ idx
+ )
+ UByte::class -> AtableUByte(this, idx) as Atable
+ UShort::class -> AtableUShort(this, idx) as Atable
+ else -> throw RuntimeException("Unsupported class type")
+ }
+
+class AtableUByte(val mat: Mat, val indices: IntArray): Atable {
+
+ constructor(mat: Mat, row: Int, col: Int) : this(mat, intArrayOf(row, col))
+
+ override fun getV(): UByte {
+ val data = UByteArray(1)
+ mat.get(indices, data)
+ return data[0]
+ }
+
+ override fun setV(v: UByte) {
+ val data = ubyteArrayOf(v)
+ mat.put(indices, data)
+ }
+
+ override fun getV2c(): Tuple2 {
+ val data = UByteArray(2)
+ mat.get(indices, data)
+ return Tuple2(data[0], data[1])
+ }
+
+ override fun setV2c(v: Tuple2) {
+ val data = ubyteArrayOf(v._0, v._1)
+ mat.put(indices, data)
+ }
+
+ override fun getV3c(): Tuple3 {
+ val data = UByteArray(3)
+ mat.get(indices, data)
+ return Tuple3(data[0], data[1], data[2])
+ }
+
+ override fun setV3c(v: Tuple3) {
+ val data = ubyteArrayOf(v._0, v._1, v._2)
+ mat.put(indices, data)
+ }
+
+ override fun getV4c(): Tuple4 {
+ val data = UByteArray(4)
+ mat.get(indices, data)
+ return Tuple4(data[0], data[1], data[2], data[3])
+ }
+
+ override fun setV4c(v: Tuple4) {
+ val data = ubyteArrayOf(v._0, v._1, v._2, v._3)
+ mat.put(indices, data)
+ }
+}
+
+class AtableUShort(val mat: Mat, val indices: IntArray): Atable {
+
+ constructor(mat: Mat, row: Int, col: Int) : this(mat, intArrayOf(row, col))
+
+ override fun getV(): UShort {
+ val data = UShortArray(1)
+ mat.get(indices, data)
+ return data[0]
+ }
+
+ override fun setV(v: UShort) {
+ val data = ushortArrayOf(v)
+ mat.put(indices, data)
+ }
+
+ override fun getV2c(): Tuple2 {
+ val data = UShortArray(2)
+ mat.get(indices, data)
+ return Tuple2(data[0], data[1])
+ }
+
+ override fun setV2c(v: Tuple2) {
+ val data = ushortArrayOf(v._0, v._1)
+ mat.put(indices, data)
+ }
+
+ override fun getV3c(): Tuple3 {
+ val data = UShortArray(3)
+ mat.get(indices, data)
+ return Tuple3(data[0], data[1], data[2])
+ }
+
+ override fun setV3c(v: Tuple3) {
+ val data = ushortArrayOf(v._0, v._1, v._2)
+ mat.put(indices, data)
+ }
+
+ override fun getV4c(): Tuple4 {
+ val data = UShortArray(4)
+ mat.get(indices, data)
+ return Tuple4(data[0], data[1], data[2], data[3])
+ }
+
+ override fun setV4c(v: Tuple4) {
+ val data = ushortArrayOf(v._0, v._1, v._2, v._3)
+ mat.put(indices, data)
+ }
+}
+
+operator fun Tuple2.component1(): T = this._0
+operator fun Tuple2.component2(): T = this._1
+
+operator fun Tuple3.component1(): T = this._0
+operator fun Tuple3.component2(): T = this._1
+operator fun Tuple3.component3(): T = this._2
+
+operator fun Tuple4.component1(): T = this._0
+operator fun Tuple4.component2(): T = this._1
+operator fun Tuple4.component3(): T = this._2
+operator fun Tuple4.component4(): T = this._3
+
+fun T2(_0: T, _1: T) : Tuple2 = Tuple2(_0, _1)
+fun T3(_0: T, _1: T, _2: T) : Tuple3 = Tuple3(_0, _1, _2)
+fun T4(_0: T, _1: T, _2: T, _3: T) : Tuple4 = Tuple4(_0, _1, _2, _3)
diff --git a/modules/core/misc/java/test/MatTest.java b/modules/core/misc/java/test/MatTest.java
index 00e7b7cb3203..3075dba16b35 100644
--- a/modules/core/misc/java/test/MatTest.java
+++ b/modules/core/misc/java/test/MatTest.java
@@ -1285,4 +1285,31 @@ public void testMatFromByteBufferWithStep() {
assertEquals(5, bbuf.get(63*80 + 63));
}
+ public void testMatAt() {
+ Mat uc1 = new Mat(2, 3, CvType.CV_8S) {
+ {
+ put(0, 0, 1, 2, 3);
+ put(1, 0, 4, 5, 6);
+ }
+ };
+ assertEquals((byte)1, uc1.at(Byte.class, 0, 0).getV().byteValue());
+ assertEquals((byte)2, uc1.at(Byte.class, 0, 1).getV().byteValue());
+ assertEquals((byte)3, uc1.at(Byte.class, 0, 2).getV().byteValue());
+ assertEquals((byte)4, uc1.at(Byte.class, 1, 0).getV().byteValue());
+ assertEquals((byte)5, uc1.at(Byte.class, 1, 1).getV().byteValue());
+ assertEquals((byte)6, uc1.at(Byte.class, 1, 2).getV().byteValue());
+ uc1.at(Byte.class, 0, 0).setV((byte)7);
+ uc1.at(Byte.class, 0, 1).setV((byte)8);
+ uc1.at(Byte.class, 0, 2).setV((byte)9);
+ uc1.at(Byte.class, 1, 0).setV((byte)10);
+ uc1.at(Byte.class, 1, 1).setV((byte)11);
+ uc1.at(Byte.class, 1, 2).setV((byte)12);
+ byte[] data = new byte[6];
+ uc1.get(0, 0, data);
+ assertArrayEquals(data, new byte[] {7, 8, 9, 10, 11, 12});
+ Mat.Tuple3 bgr = rgbLena.at(Byte.class, 0, 0).getV3c();
+ assertEquals(bgr.get_0().byteValue(), (byte)128);
+ assertEquals(bgr.get_1().byteValue(), (byte)138);
+ assertEquals(bgr.get_2().byteValue(), (byte)225);
+ }
}
diff --git a/modules/core/misc/objc/common/Mat.mm b/modules/core/misc/objc/common/Mat.mm
index 5d41a3622e71..045bd8393ea3 100644
--- a/modules/core/misc/objc/common/Mat.mm
+++ b/modules/core/misc/objc/common/Mat.mm
@@ -548,7 +548,7 @@ - (void)put:(uchar*)dest data:(NSArray*)data offset:(int)offset count
if (depth == CV_8U) {
putData(dest, count, ^uchar (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} );
} else if (depth == CV_8S) {
- putData(dest, count, ^char (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} );
+ putData(dest, count, ^schar (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} );
} else if (depth == CV_16U) {
putData(dest, count, ^ushort (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} );
} else if (depth == CV_16S) {
diff --git a/modules/core/misc/objc/common/MatExt.swift b/modules/core/misc/objc/common/MatExt.swift
index f6b3072345ec..a6ba548599d8 100644
--- a/modules/core/misc/objc/common/MatExt.swift
+++ b/modules/core/misc/objc/common/MatExt.swift
@@ -33,6 +33,10 @@ func throwIncompatibleBufferSize(count: Int, channels: Int32) throws {
)
}
+public typealias T2 = (T, T)
+public typealias T3 = (T, T, T)
+public typealias T4 = (T, T, T, T)
+
public extension Mat {
convenience init(rows:Int32, cols:Int32, type:Int32, data:[Int8]) {
@@ -58,6 +62,21 @@ public extension Mat {
}
}
+ @discardableResult func get(indices:[Int32], data:inout [UInt8]) throws -> Int32 {
+ let channels = CvType.channels(Int32(type()))
+ if Int32(data.count) % channels != 0 {
+ try throwIncompatibleBufferSize(count: data.count, channels: channels)
+ } else if depth() != CvType.CV_8U {
+ try throwIncompatibleDataType(typeName: CvType.type(toString: type()))
+ }
+ let count = Int32(data.count)
+ return data.withUnsafeMutableBufferPointer { body in
+ body.withMemoryRebound(to: Int8.self) { reboundBody in
+ return __get(indices as [NSNumber], count: count, byteBuffer: reboundBody.baseAddress!)
+ }
+ }
+ }
+
@discardableResult func get(indices:[Int32], data:inout [Double]) throws -> Int32 {
let channels = CvType.channels(Int32(type()))
if Int32(data.count) % channels != 0 {
@@ -110,10 +129,29 @@ public extension Mat {
}
}
+ @discardableResult func get(indices:[Int32], data:inout [UInt16]) throws -> Int32 {
+ let channels = CvType.channels(Int32(type()))
+ if Int32(data.count) % channels != 0 {
+ try throwIncompatibleBufferSize(count: data.count, channels: channels)
+ } else if depth() != CvType.CV_16U {
+ try throwIncompatibleDataType(typeName: CvType.type(toString: type()))
+ }
+ let count = Int32(data.count)
+ return data.withUnsafeMutableBufferPointer { body in
+ body.withMemoryRebound(to: Int16.self) { reboundBody in
+ return __get(indices as [NSNumber], count: count, shortBuffer: reboundBody.baseAddress!)
+ }
+ }
+ }
+
@discardableResult func get(row: Int32, col: Int32, data:inout [Int8]) throws -> Int32 {
return try get(indices: [row, col], data: &data)
}
+ @discardableResult func get(row: Int32, col: Int32, data:inout [UInt8]) throws -> Int32 {
+ return try get(indices: [row, col], data: &data)
+ }
+
@discardableResult func get(row: Int32, col: Int32, data:inout [Double]) throws -> Int32 {
return try get(indices: [row, col], data: &data)
}
@@ -130,6 +168,10 @@ public extension Mat {
return try get(indices: [row, col], data: &data)
}
+ @discardableResult func get(row: Int32, col: Int32, data:inout [UInt16]) throws -> Int32 {
+ return try get(indices: [row, col], data: &data)
+ }
+
@discardableResult func put(indices:[Int32], data:[Int8]) throws -> Int32 {
let channels = CvType.channels(Int32(type()))
if Int32(data.count) % channels != 0 {
@@ -143,6 +185,21 @@ public extension Mat {
}
}
+ @discardableResult func put(indices:[Int32], data:[UInt8]) throws -> Int32 {
+ let channels = CvType.channels(Int32(type()))
+ if Int32(data.count) % channels != 0 {
+ try throwIncompatibleBufferSize(count: data.count, channels: channels)
+ } else if depth() != CvType.CV_8U {
+ try throwIncompatibleDataType(typeName: CvType.type(toString: type()))
+ }
+ let count = Int32(data.count)
+ return data.withUnsafeBufferPointer { body in
+ body.withMemoryRebound(to: Int8.self) { reboundBody in
+ return __put(indices as [NSNumber], count: count, byteBuffer: reboundBody.baseAddress!)
+ }
+ }
+ }
+
@discardableResult func put(indices:[Int32], data:[Int8], offset: Int, length: Int32) throws -> Int32 {
let channels = CvType.channels(Int32(type()))
if Int32(data.count) % channels != 0 {
@@ -210,10 +267,29 @@ public extension Mat {
}
}
+ @discardableResult func put(indices:[Int32], data:[UInt16]) throws -> Int32 {
+ let channels = CvType.channels(Int32(type()))
+ if Int32(data.count) % channels != 0 {
+ try throwIncompatibleBufferSize(count: data.count, channels: channels)
+ } else if depth() != CvType.CV_16U {
+ try throwIncompatibleDataType(typeName: CvType.type(toString: type()))
+ }
+ let count = Int32(data.count)
+ return data.withUnsafeBufferPointer { body in
+ body.withMemoryRebound(to: Int16.self) { reboundBody in
+ return __put(indices as [NSNumber], count: count, shortBuffer: reboundBody.baseAddress!)
+ }
+ }
+ }
+
@discardableResult func put(row: Int32, col: Int32, data:[Int8]) throws -> Int32 {
return try put(indices: [row, col], data: data)
}
+ @discardableResult func put(row: Int32, col: Int32, data:[UInt8]) throws -> Int32 {
+ return try put(indices: [row, col], data: data)
+ }
+
@discardableResult func put(row: Int32, col: Int32, data: [Int8], offset: Int, length: Int32) throws -> Int32 {
return try put(indices: [row, col], data: data, offset: offset, length: length)
}
@@ -234,6 +310,10 @@ public extension Mat {
return try put(indices: [row, col], data: data)
}
+ @discardableResult func put(row: Int32, col: Int32, data: [UInt16]) throws -> Int32 {
+ return try put(indices: [row, col], data: data)
+ }
+
@discardableResult func get(row: Int32, col: Int32) -> [Double] {
return get(indices: [row, col])
}
@@ -242,3 +322,396 @@ public extension Mat {
return __get(indices as [NSNumber]) as! [Double]
}
}
+
+public protocol Atable {
+ static func getAt(m: Mat, indices:[Int32]) -> Self
+ static func putAt(m: Mat, indices:[Int32], v: Self)
+ static func getAt2c(m: Mat, indices:[Int32]) -> (Self, Self)
+ static func putAt2c(m: Mat, indices:[Int32], v: (Self, Self))
+ static func getAt3c(m: Mat, indices:[Int32]) -> (Self, Self, Self)
+ static func putAt3c(m: Mat, indices:[Int32], v: (Self, Self, Self))
+ static func getAt4c(m: Mat, indices:[Int32]) -> (Self, Self, Self, Self)
+ static func putAt4c(m: Mat, indices:[Int32], v: (Self, Self, Self, Self))
+}
+
+public class MatAt {
+
+ init(mat: Mat, indices: [Int32]) {
+ self.mat = mat
+ self.indices = indices
+ }
+
+ private let mat: Mat
+ private let indices: [Int32]
+ public var v: N {
+ get {
+ return N.getAt(m: mat, indices: indices)
+ }
+ set(value) {
+ N.putAt(m: mat, indices: indices, v: value)
+ }
+ }
+ public var v2c: (N, N) {
+ get {
+ return N.getAt2c(m: mat, indices: indices)
+ }
+ set(value) {
+ N.putAt2c(m: mat, indices: indices, v: value)
+ }
+ }
+ public var v3c: (N, N, N) {
+ get {
+ return N.getAt3c(m: mat, indices: indices)
+ }
+ set(value) {
+ N.putAt3c(m: mat, indices: indices, v: value)
+ }
+ }
+ public var v4c: (N, N, N, N) {
+ get {
+ return N.getAt4c(m: mat, indices: indices)
+ }
+ set(value) {
+ N.putAt4c(m: mat, indices: indices, v: value)
+ }
+ }
+}
+
+extension UInt8: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> UInt8 {
+ var tmp = [UInt8](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: UInt8) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (UInt8, UInt8) {
+ var tmp = [UInt8](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (UInt8, UInt8)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (UInt8, UInt8, UInt8) {
+ var tmp = [UInt8](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (UInt8, UInt8, UInt8)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (UInt8, UInt8, UInt8, UInt8) {
+ var tmp = [UInt8](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (UInt8, UInt8, UInt8, UInt8)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension Int8: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> Int8 {
+ var tmp = [Int8](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: Int8) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (Int8, Int8) {
+ var tmp = [Int8](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (Int8, Int8)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (Int8, Int8, Int8) {
+ var tmp = [Int8](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (Int8, Int8, Int8)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (Int8, Int8, Int8, Int8) {
+ var tmp = [Int8](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (Int8, Int8, Int8, Int8)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension Double: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> Double {
+ var tmp = [Double](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: Double) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (Double, Double) {
+ var tmp = [Double](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (Double, Double)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (Double, Double, Double) {
+ var tmp = [Double](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (Double, Double, Double)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (Double, Double, Double, Double) {
+ var tmp = [Double](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (Double, Double, Double, Double)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension Float: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> Float {
+ var tmp = [Float](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: Float) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (Float, Float) {
+ var tmp = [Float](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (Float, Float)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (Float, Float, Float) {
+ var tmp = [Float](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (Float, Float, Float)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (Float, Float, Float, Float) {
+ var tmp = [Float](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (Float, Float, Float, Float)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension Int32: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> Int32 {
+ var tmp = [Int32](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: Int32) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (Int32, Int32) {
+ var tmp = [Int32](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (Int32, Int32)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (Int32, Int32, Int32) {
+ var tmp = [Int32](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (Int32, Int32, Int32)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (Int32, Int32, Int32, Int32) {
+ var tmp = [Int32](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (Int32, Int32, Int32, Int32)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension UInt16: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> UInt16 {
+ var tmp = [UInt16](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: UInt16) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (UInt16, UInt16) {
+ var tmp = [UInt16](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (UInt16, UInt16)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (UInt16, UInt16, UInt16) {
+ var tmp = [UInt16](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (UInt16, UInt16, UInt16)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (UInt16, UInt16, UInt16, UInt16) {
+ var tmp = [UInt16](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (UInt16, UInt16, UInt16, UInt16)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+extension Int16: Atable {
+ public static func getAt(m: Mat, indices:[Int32]) -> Int16 {
+ var tmp = [Int16](repeating: 0, count: 1)
+ try! m.get(indices: indices, data: &tmp)
+ return tmp[0]
+ }
+
+ public static func putAt(m: Mat, indices: [Int32], v: Int16) {
+ let tmp = [v]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt2c(m: Mat, indices:[Int32]) -> (Int16, Int16) {
+ var tmp = [Int16](repeating: 0, count: 2)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1])
+ }
+
+ public static func putAt2c(m: Mat, indices: [Int32], v: (Int16, Int16)) {
+ let tmp = [v.0, v.1]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt3c(m: Mat, indices:[Int32]) -> (Int16, Int16, Int16) {
+ var tmp = [Int16](repeating: 0, count: 3)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2])
+ }
+
+ public static func putAt3c(m: Mat, indices: [Int32], v: (Int16, Int16, Int16)) {
+ let tmp = [v.0, v.1, v.2]
+ try! m.put(indices: indices, data: tmp)
+ }
+
+ public static func getAt4c(m: Mat, indices:[Int32]) -> (Int16, Int16, Int16, Int16) {
+ var tmp = [Int16](repeating: 0, count: 4)
+ try! m.get(indices: indices, data: &tmp)
+ return (tmp[0], tmp[1], tmp[2], tmp[3])
+ }
+
+ public static func putAt4c(m: Mat, indices: [Int32], v: (Int16, Int16, Int16, Int16)) {
+ let tmp = [v.0, v.1, v.2, v.3]
+ try! m.put(indices: indices, data: tmp)
+ }
+}
+
+/***
+ * Example use:
+ *
+ * let elemantVal: UInt8 = mat.at(row: 50, col: 50).v
+ * mat.at(row: 50, col: 50).v = 245
+ *
+ */
+public extension Mat {
+ func at(row: Int32, col: Int32) -> MatAt {
+ return MatAt(mat: self, indices: [row, col])
+ }
+
+ func at(indices:[Int32]) -> MatAt {
+ return MatAt(mat: self, indices: indices)
+ }
+}
diff --git a/modules/core/misc/objc/test/MatTest.swift b/modules/core/misc/objc/test/MatTest.swift
index af26eb0bdb20..8a513505cc14 100644
--- a/modules/core/misc/objc/test/MatTest.swift
+++ b/modules/core/misc/objc/test/MatTest.swift
@@ -308,15 +308,15 @@ class MatTests: OpenCVTestCase {
XCTAssert([340] == sm.get(row: 1, col: 1))
}
- func testGetIntIntByteArray() throws {
- let m = try getTestMat(size: 5, type: CvType.CV_8UC3)
+ func testGetIntIntInt8Array() throws {
+ let m = try getTestMat(size: 5, type: CvType.CV_8SC3)
var goodData = [Int8](repeating: 0, count: 9)
// whole Mat
var bytesNum = try m.get(row: 1, col: 1, data: &goodData)
XCTAssertEqual(9, bytesNum)
- XCTAssert([110, 111, 112, 120, 121, 122, -126, -125, -124] == goodData)
+ XCTAssert([110, 111, 112, 120, 121, 122, 127, 127, 127] == goodData)
var badData = [Int8](repeating: 0, count: 7)
XCTAssertThrowsError(bytesNum = try m.get(row: 0, col: 0, data: &badData))
@@ -326,11 +326,36 @@ class MatTests: OpenCVTestCase {
var buff00 = [Int8](repeating: 0, count: 3)
bytesNum = try sm.get(row: 0, col: 0, data: &buff00)
XCTAssertEqual(3, bytesNum)
- XCTAssert(buff00 == [-26, -25, -24])
+ XCTAssert(buff00 == [127, 127, 127])
var buff11 = [Int8](repeating: 0, count: 3)
bytesNum = try sm.get(row: 1, col: 1, data: &buff11)
XCTAssertEqual(3, bytesNum)
- XCTAssert(buff11 == [-1, -1, -1])
+ XCTAssert(buff11 == [127, 127, 127])
+ }
+
+ func testGetIntIntUInt8Array() throws {
+ let m = try getTestMat(size: 5, type: CvType.CV_8UC3)
+ var goodData = [UInt8](repeating: 0, count: 9)
+
+ // whole Mat
+ var bytesNum = try m.get(row: 1, col: 1, data: &goodData)
+
+ XCTAssertEqual(9, bytesNum)
+ XCTAssert([110, 111, 112, 120, 121, 122, 130, 131, 132] == goodData)
+
+ var badData = [UInt8](repeating: 0, count: 7)
+ XCTAssertThrowsError(bytesNum = try m.get(row: 0, col: 0, data: &badData))
+
+ // sub-Mat
+ let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5)
+ var buff00 = [UInt8](repeating: 0, count: 3)
+ bytesNum = try sm.get(row: 0, col: 0, data: &buff00)
+ XCTAssertEqual(3, bytesNum)
+ XCTAssert(buff00 == [230, 231, 232])
+ var buff11 = [UInt8](repeating: 0, count: 3)
+ bytesNum = try sm.get(row: 1, col: 1, data: &buff11)
+ XCTAssertEqual(3, bytesNum)
+ XCTAssert(buff11 == [255, 255, 255])
}
func testGetIntIntDoubleArray() throws {
@@ -399,7 +424,7 @@ class MatTests: OpenCVTestCase {
XCTAssert(buff11 == [340, 341, 0, 0])
}
- func testGetIntIntShortArray() throws {
+ func testGetIntIntInt16Array() throws {
let m = try getTestMat(size: 5, type: CvType.CV_16SC2)
var buff = [Int16](repeating: 0, count: 6)
@@ -421,6 +446,28 @@ class MatTests: OpenCVTestCase {
XCTAssert(buff11 == [340, 341, 0, 0])
}
+ func testGetIntIntUInt16Array() throws {
+ let m = try getTestMat(size: 5, type: CvType.CV_16UC2)
+ var buff = [UInt16](repeating: 0, count: 6)
+
+ // whole Mat
+ var bytesNum = try m.get(row: 1, col: 1, data: &buff)
+
+ XCTAssertEqual(12, bytesNum);
+ XCTAssert(buff == [110, 111, 120, 121, 130, 131])
+
+ // sub-Mat
+ let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5)
+ var buff00 = [UInt16](repeating: 0, count: 4)
+ bytesNum = try sm.get(row: 0, col: 0, data: &buff00)
+ XCTAssertEqual(8, bytesNum)
+ XCTAssert(buff00 == [230, 231, 240, 241])
+ var buff11 = [UInt16](repeating: 0, count: 4)
+ bytesNum = try sm.get(row: 1, col: 1, data: &buff11)
+ XCTAssertEqual(4, bytesNum);
+ XCTAssert(buff11 == [340, 341, 0, 0])
+ }
+
func testHeight() {
XCTAssertEqual(gray0.rows(), gray0.height())
XCTAssertEqual(rgbLena.rows(), rgbLena.height())
@@ -653,7 +700,7 @@ class MatTests: OpenCVTestCase {
try assertMatEqual(truth!, m1, OpenCVTestCase.EPS)
}
- func testPutIntIntByteArray() throws {
+ func testPutIntIntInt8Array() throws {
let m = Mat(rows: 5, cols: 5, type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3))
let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5)
var buff = [Int8](repeating: 0, count: 6)
@@ -683,7 +730,37 @@ class MatTests: OpenCVTestCase {
XCTAssert(buff == buff0)
}
- func testPutIntArrayByteArray() throws {
+ func testPutIntIntUInt8Array() throws {
+ let m = Mat(rows: 5, cols: 5, type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3))
+ let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5)
+ var buff = [UInt8](repeating: 0, count: 6)
+ let buff0:[UInt8] = [10, 20, 30, 40, 50, 60]
+ let buff1:[UInt8] = [255, 254, 253, 252, 251, 250]
+
+ var bytesNum = try m.put(row:1, col:2, data:buff0)
+
+ XCTAssertEqual(6, bytesNum)
+ bytesNum = try m.get(row: 1, col: 2, data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff0)
+
+ bytesNum = try sm.put(row:0, col:0, data:buff1)
+
+ XCTAssertEqual(6, bytesNum)
+ bytesNum = try sm.get(row: 0, col: 0, data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff1)
+ bytesNum = try m.get(row: 2, col: 3, data: &buff)
+ XCTAssertEqual(6, bytesNum);
+ XCTAssert(buff == buff1)
+
+ let m1 = m.row(1)
+ bytesNum = try m1.get(row: 0, col: 2, data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff0)
+ }
+
+ func testPutIntArrayInt8Array() throws {
let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3))
let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)])
var buff = [Int8](repeating: 0, count: 6)
@@ -714,10 +791,41 @@ class MatTests: OpenCVTestCase {
XCTAssert(buff == buff0)
}
+ func testPutIntArrayUInt8Array() throws {
+ let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3))
+ let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)])
+ var buff = [UInt8](repeating: 0, count: 6)
+ let buff0:[UInt8] = [10, 20, 30, 40, 50, 60]
+ let buff1:[UInt8] = [255, 254, 253, 252, 251, 250]
+
+ var bytesNum = try m.put(indices:[1, 2, 0], data:buff0)
+
+ XCTAssertEqual(6, bytesNum)
+ bytesNum = try m.get(indices: [1, 2, 0], data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff0)
+
+ bytesNum = try sm.put(indices: [0, 0, 0], data: buff1)
+
+ XCTAssertEqual(6, bytesNum)
+ bytesNum = try sm.get(indices: [0, 0, 0], data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff1)
+
+ bytesNum = try m.get(indices: [0, 1, 2], data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff1)
+
+ let m1 = m.submat(ranges: [Range(start: 1,end: 2), Range.all(), Range.all()])
+ bytesNum = try m1.get(indices: [0, 2, 0], data: &buff)
+ XCTAssertEqual(6, bytesNum)
+ XCTAssert(buff == buff0)
+ }
+
func testPutIntIntDoubleArray() throws {
- let m = Mat(rows: 5, cols: 5, type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3))
+ let m = Mat(rows: 5, cols: 5, type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3))
let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5)
- var buff = [Int8](repeating: 0, count: 6)
+ var buff = [UInt8](repeating: 0, count: 6)
var bytesNum = try m.put(row: 1, col: 2, data: [10, 20, 30, 40, 50, 60] as [Double])
@@ -731,16 +839,16 @@ class MatTests: OpenCVTestCase {
XCTAssertEqual(6, bytesNum)
bytesNum = try sm.get(row: 0, col: 0, data: &buff)
XCTAssertEqual(6, bytesNum);
- XCTAssert(buff == [-1, -2, -3, -4, -5, -6])
+ XCTAssert(buff == [255, 254, 253, 252, 251, 250])
bytesNum = try m.get(row: 2, col: 3, data: &buff)
XCTAssertEqual(6, bytesNum);
- XCTAssert(buff == [-1, -2, -3, -4, -5, -6])
+ XCTAssert(buff == [255, 254, 253, 252, 251, 250])
}
func testPutIntArrayDoubleArray() throws {
- let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3))
+ let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3))
let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)])
- var buff = [Int8](repeating: 0, count: 6)
+ var buff = [UInt8](repeating: 0, count: 6)
var bytesNum = try m.put(indices: [1, 2, 0], data: [10, 20, 30, 40, 50, 60] as [Double])
@@ -754,10 +862,10 @@ class MatTests: OpenCVTestCase {
XCTAssertEqual(6, bytesNum);
bytesNum = try sm.get(indices: [0, 0, 0], data: &buff)
XCTAssertEqual(6, bytesNum);
- XCTAssert(buff == [-1, -2, -3, -4, -5, -6])
+ XCTAssert(buff == [255, 254, 253, 252, 251, 250])
bytesNum = try m.get(indices: [0, 1, 2], data: &buff)
XCTAssertEqual(6, bytesNum)
- XCTAssert(buff == [-1, -2, -3, -4, -5, -6])
+ XCTAssert(buff == [255, 254, 253, 252, 251, 250])
}
func testPutIntIntFloatArray() throws {
@@ -820,7 +928,7 @@ class MatTests: OpenCVTestCase {
XCTAssert([40, 50, 60] == m.get(indices: [0, 1, 0]))
}
- func testPutIntIntShortArray() throws {
+ func testPutIntIntInt16Array() throws {
let m = Mat(rows: 5, cols: 5, type: CvType.CV_16SC3, scalar: Scalar(-1, -2, -3))
let elements: [Int16] = [ 10, 20, 30, 40, 50, 60]
@@ -834,7 +942,21 @@ class MatTests: OpenCVTestCase {
XCTAssert([40, 50, 60] == m.get(row: 2, col: 4))
}
- func testPutIntArrayShortArray() throws {
+ func testPutIntIntUInt16Array() throws {
+ let m = Mat(rows: 5, cols: 5, type: CvType.CV_16UC3, scalar: Scalar(-1, -2, -3))
+ let elements: [UInt16] = [ 10, 20, 30, 40, 50, 60]
+
+ var bytesNum = try m.put(row: 2, col: 3, data: elements)
+
+ XCTAssertEqual(Int32(elements.count * 2), bytesNum)
+ let m1 = m.col(3)
+ var buff = [UInt16](repeating: 0, count: 3)
+ bytesNum = try m1.get(row: 2, col: 0, data: &buff)
+ XCTAssert(buff == [10, 20, 30])
+ XCTAssert([40, 50, 60] == m.get(row: 2, col: 4))
+ }
+
+ func testPutIntArrayInt16Array() throws {
let m = Mat(sizes: [5, 5, 5], type: CvType.CV_16SC3, scalar: Scalar(-1, -2, -3))
let elements: [Int16] = [ 10, 20, 30, 40, 50, 60]
@@ -848,6 +970,20 @@ class MatTests: OpenCVTestCase {
XCTAssert([40, 50, 60] == m.get(indices: [0, 2, 4]))
}
+ func testPutIntArrayUInt16Array() throws {
+ let m = Mat(sizes: [5, 5, 5], type: CvType.CV_16UC3, scalar: Scalar(-1, -2, -3))
+ let elements: [UInt16] = [ 10, 20, 30, 40, 50, 60]
+
+ var bytesNum = try m.put(indices: [0, 2, 3], data: elements)
+
+ XCTAssertEqual(Int32(elements.count * 2), bytesNum)
+ let m1 = m.submat(ranges: [Range.all(), Range.all(), Range(start: 3, end: 4)])
+ var buff = [UInt16](repeating: 0, count: 3)
+ bytesNum = try m1.get(indices: [0, 2, 0], data: &buff)
+ XCTAssert(buff == [10, 20, 30])
+ XCTAssert([40, 50, 60] == m.get(indices: [0, 2, 4]))
+ }
+
func testReshapeInt() throws {
let src = Mat(rows: 4, cols: 4, type: CvType.CV_8U, scalar: Scalar(0))
dst = src.reshape(channels: 4)
@@ -1143,4 +1279,28 @@ class MatTests: OpenCVTestCase {
XCTAssertEqual(5, bufferOut[63*80 + 63])
}
+ func testMatAt() {
+ let uc1 = Mat(rows: 2, cols: 3, type: CvType.CV_8U)
+ try! uc1.put(row: 0, col: 0, data: [1, 2, 3, 4, 5, 6] as [Int8])
+ XCTAssertEqual(UInt8(1), uc1.at(row: 0, col: 0).v)
+ XCTAssertEqual(UInt8(2), uc1.at(row: 0, col: 1).v)
+ XCTAssertEqual(UInt8(3), uc1.at(row: 0, col: 2).v)
+ XCTAssertEqual(UInt8(4), uc1.at(row: 1, col: 0).v)
+ XCTAssertEqual(UInt8(5), uc1.at(row: 1, col: 1).v)
+ XCTAssertEqual(UInt8(6), uc1.at(row: 1, col: 2).v)
+ uc1.at(row: 0, col: 0).v = UInt8(7)
+ uc1.at(row: 0, col: 1).v = UInt8(8)
+ uc1.at(row: 0, col: 2).v = UInt8(9)
+ uc1.at(row: 1, col: 0).v = UInt8(10)
+ uc1.at(row: 1, col: 1).v = UInt8(11)
+ uc1.at(row: 1, col: 2).v = UInt8(12)
+ var data = [Int8](repeating: 0, count: 6)
+ try! uc1.get(row: 0, col: 0, data: &data)
+ XCTAssertEqual(data, [7, 8, 9, 10, 11, 12] as [Int8])
+ let (b, g, r): T3 = rgbLena.at(row: 0, col: 0).v3c
+ XCTAssertEqual(b, UInt8(128))
+ XCTAssertEqual(g, UInt8(138))
+ XCTAssertEqual(r, UInt8(225))
+ }
+
}
diff --git a/modules/core/perf/opencl/perf_usage_flags.cpp b/modules/core/perf/opencl/perf_usage_flags.cpp
index d59087121f51..0717121d1cf7 100644
--- a/modules/core/perf/opencl/perf_usage_flags.cpp
+++ b/modules/core/perf/opencl/perf_usage_flags.cpp
@@ -12,25 +12,33 @@
namespace opencv_test {
namespace ocl {
-typedef TestBaseWithParam > UsageFlagsBoolFixture;
-
-OCL_PERF_TEST_P(UsageFlagsBoolFixture, UsageFlags_AllocHostMem, ::testing::Combine(OCL_TEST_SIZES, Bool()))
+typedef TestBaseWithParam> SizeUsageFlagsFixture;
+
+OCL_PERF_TEST_P(SizeUsageFlagsFixture, UsageFlags_AllocMem,
+ ::testing::Combine(
+ OCL_TEST_SIZES,
+ testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY
+ testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY
+ testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) // USAGE_ALLOCATE_SHARED_MEMORY
+ ))
{
Size sz = get<0>(GetParam());
- bool allocHostMem = get<1>(GetParam());
+ UMatUsageFlags srcAllocMem = get<1>(GetParam());
+ UMatUsageFlags dstAllocMem = get<2>(GetParam());
+ UMatUsageFlags finalAllocMem = get<3>(GetParam());
- UMat src(sz, CV_8UC1, Scalar::all(128));
+ UMat src(sz, CV_8UC1, Scalar::all(128), srcAllocMem);
OCL_TEST_CYCLE()
{
- UMat dst(allocHostMem ? USAGE_ALLOCATE_HOST_MEMORY : USAGE_DEFAULT);
+ UMat dst(dstAllocMem);
cv::add(src, Scalar::all(1), dst);
{
Mat canvas = dst.getMat(ACCESS_RW);
cv::putText(canvas, "Test", Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar::all(255));
}
- UMat final;
+ UMat final(finalAllocMem);
cv::subtract(dst, Scalar::all(1), final);
}
diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp
index 0cddc909985c..f88597aacc68 100644
--- a/modules/core/src/arithm.simd.hpp
+++ b/modules/core/src/arithm.simd.hpp
@@ -1910,4 +1910,4 @@ DEFINE_SIMD_ALL(recip, recip_loop)
#define SIMD_GUARD
#endif
-}} // cv::hal::
\ No newline at end of file
+}} // cv::hal::
diff --git a/modules/core/src/async.cpp b/modules/core/src/async.cpp
index a2f4612365b9..78c0a1ee8116 100644
--- a/modules/core/src/async.cpp
+++ b/modules/core/src/async.cpp
@@ -14,6 +14,7 @@
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
#include
+#ifndef OPENCV_DISABLE_THREAD_SUPPORT
#ifdef CV_CXX11
#include
@@ -236,6 +237,171 @@ struct AsyncArray::Impl
}
};
+} // namespace
+
+#else // OPENCV_DISABLE_THREAD_SUPPORT
+
+namespace cv {
+
+// no threading
+struct AsyncArray::Impl
+{
+ int refcount;
+ void addrefFuture() CV_NOEXCEPT { refcount_future++; refcount++; }
+ void releaseFuture() CV_NOEXCEPT { refcount_future--; if (0 == --refcount) delete this; }
+ int refcount_future;
+ void addrefPromise() CV_NOEXCEPT { refcount_promise++; refcount++; } \
+ void releasePromise() CV_NOEXCEPT { refcount_promise--; if (0 == --refcount) delete this; }
+ int refcount_promise;
+
+ mutable bool has_result; // Mat, UMat or exception
+
+ mutable cv::Ptr result_mat;
+ mutable cv::Ptr result_umat;
+
+
+ bool has_exception;
+#if CV__EXCEPTION_PTR
+ std::exception_ptr exception;
+#endif
+ cv::Exception cv_exception;
+
+ mutable bool result_is_fetched;
+
+ bool future_is_returned;
+
+ Impl()
+ : refcount(1), refcount_future(0), refcount_promise(1)
+ , has_result(false)
+ , has_exception(false)
+ , result_is_fetched(false)
+ , future_is_returned(false)
+ {
+ // nothing
+ }
+
+ ~Impl()
+ {
+ if (has_result && !result_is_fetched)
+ {
+ CV_LOG_INFO(NULL, "Asynchronous result has not been fetched");
+ }
+ }
+
+ bool get(OutputArray dst, int64 timeoutNs) const
+ {
+ CV_Assert(!result_is_fetched);
+ if (!has_result)
+ {
+ CV_UNUSED(timeoutNs);
+ CV_Error(Error::StsError, "Result is not produced (unable to wait for result in OPENCV_DISABLE_THREAD_SUPPORT mode)");
+ }
+ if (!result_mat.empty())
+ {
+ dst.move(*result_mat.get());
+ result_mat.release();
+ result_is_fetched = true;
+ return true;
+ }
+ if (!result_umat.empty())
+ {
+ dst.move(*result_umat.get());
+ result_umat.release();
+ result_is_fetched = true;
+ return true;
+ }
+#if CV__EXCEPTION_PTR
+ if (has_exception && exception)
+ {
+ result_is_fetched = true;
+ std::rethrow_exception(exception);
+ }
+#endif
+ if (has_exception)
+ {
+ result_is_fetched = true;
+ throw cv_exception;
+ }
+ CV_Error(Error::StsInternal, "AsyncArray: invalid state of 'has_result = true'");
+ return false;
+ }
+
+ bool valid() const CV_NOEXCEPT
+ {
+ if (result_is_fetched)
+ return false;
+ if (refcount_promise == 0 && !has_result)
+ return false;
+ return true;
+ }
+
+ bool wait_for(int64 timeoutNs) const
+ {
+ CV_Assert(valid());
+ if (has_result)
+ return has_result;
+ if (timeoutNs == 0)
+ return has_result;
+ CV_Error(Error::StsError, "Unable to wait in OPENCV_DISABLE_THREAD_SUPPORT mode");
+ }
+
+ AsyncArray getArrayResult()
+ {
+ CV_Assert(refcount_future == 0);
+ AsyncArray result;
+ addrefFuture();
+ result.p = this;
+ future_is_returned = true;
+ return result;
+ }
+
+ void setValue(InputArray value)
+ {
+ if (future_is_returned && refcount_future == 0)
+ CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
+ CV_Assert(!has_result);
+ int k = value.kind();
+ if (k == _InputArray::UMAT)
+ {
+ result_umat = makePtr();
+ value.copyTo(*result_umat.get());
+ }
+ else
+ {
+ result_mat = makePtr();
+ value.copyTo(*result_mat.get());
+ }
+ has_result = true;
+ }
+
+#if CV__EXCEPTION_PTR
+ void setException(std::exception_ptr e)
+ {
+ if (future_is_returned && refcount_future == 0)
+ CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
+ CV_Assert(!has_result);
+ has_exception = true;
+ exception = e;
+ has_result = true;
+ }
+#endif
+
+ void setException(const cv::Exception e)
+ {
+ if (future_is_returned && refcount_future == 0)
+ CV_Error(Error::StsError, "Associated AsyncArray has been destroyed");
+ CV_Assert(!has_result);
+ has_exception = true;
+ cv_exception = e;
+ has_result = true;
+ }
+};
+
+}
+
+#endif // OPENCV_DISABLE_THREAD_SUPPORT
+
+namespace cv {
AsyncArray::AsyncArray() CV_NOEXCEPT
: p(NULL)
diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp
index d17adc6b48ae..2dbc3e27635e 100644
--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
@@ -80,15 +80,15 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
case DXGI_FORMAT_R32G32B32_UINT:
case DXGI_FORMAT_R32G32B32_SINT: return CV_32SC3;
//case DXGI_FORMAT_R16G16B16A16_TYPELESS:
- //case DXGI_FORMAT_R16G16B16A16_FLOAT:
+ case DXGI_FORMAT_R16G16B16A16_FLOAT: return CV_16FC4;
case DXGI_FORMAT_R16G16B16A16_UNORM:
case DXGI_FORMAT_R16G16B16A16_UINT: return CV_16UC4;
case DXGI_FORMAT_R16G16B16A16_SNORM:
case DXGI_FORMAT_R16G16B16A16_SINT: return CV_16SC4;
//case DXGI_FORMAT_R32G32_TYPELESS:
- //case DXGI_FORMAT_R32G32_FLOAT:
- //case DXGI_FORMAT_R32G32_UINT:
- //case DXGI_FORMAT_R32G32_SINT:
+ case DXGI_FORMAT_R32G32_FLOAT: return CV_32FC2;
+ case DXGI_FORMAT_R32G32_UINT:
+ case DXGI_FORMAT_R32G32_SINT: return CV_32SC2;
//case DXGI_FORMAT_R32G8X24_TYPELESS:
//case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
//case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
@@ -104,13 +104,13 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
case DXGI_FORMAT_R8G8B8A8_SNORM:
case DXGI_FORMAT_R8G8B8A8_SINT: return CV_8SC4;
//case DXGI_FORMAT_R16G16_TYPELESS:
- //case DXGI_FORMAT_R16G16_FLOAT:
+ case DXGI_FORMAT_R16G16_FLOAT: return CV_16FC2;
case DXGI_FORMAT_R16G16_UNORM:
case DXGI_FORMAT_R16G16_UINT: return CV_16UC2;
case DXGI_FORMAT_R16G16_SNORM:
case DXGI_FORMAT_R16G16_SINT: return CV_16SC2;
//case DXGI_FORMAT_R32_TYPELESS:
- //case DXGI_FORMAT_D32_FLOAT:
+ case DXGI_FORMAT_D32_FLOAT:
case DXGI_FORMAT_R32_FLOAT: return CV_32FC1;
case DXGI_FORMAT_R32_UINT:
case DXGI_FORMAT_R32_SINT: return CV_32SC1;
@@ -124,7 +124,7 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
case DXGI_FORMAT_R8G8_SNORM:
case DXGI_FORMAT_R8G8_SINT: return CV_8SC2;
//case DXGI_FORMAT_R16_TYPELESS:
- //case DXGI_FORMAT_R16_FLOAT:
+ case DXGI_FORMAT_R16_FLOAT: return CV_16FC1;
case DXGI_FORMAT_D16_UNORM:
case DXGI_FORMAT_R16_UNORM:
case DXGI_FORMAT_R16_UINT: return CV_16UC1;
@@ -138,8 +138,8 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
case DXGI_FORMAT_A8_UNORM: return CV_8UC1;
//case DXGI_FORMAT_R1_UNORM:
//case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
- //case DXGI_FORMAT_R8G8_B8G8_UNORM:
- //case DXGI_FORMAT_G8R8_G8B8_UNORM:
+ case DXGI_FORMAT_R8G8_B8G8_UNORM:
+ case DXGI_FORMAT_G8R8_G8B8_UNORM: return CV_8UC4;
//case DXGI_FORMAT_BC1_TYPELESS:
//case DXGI_FORMAT_BC1_UNORM:
//case DXGI_FORMAT_BC1_UNORM_SRGB:
diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp
index 87873666d910..ab42a40a4428 100644
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -40,7 +40,7 @@
//M*/
#include "precomp.hpp"
-#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
+#include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencl_kernels_core.hpp"
#include