From dce2921eaa9431bba219e0675f925124df43a05d Mon Sep 17 00:00:00 2001 From: Adrian Kierzkowski Date: Fri, 26 Apr 2024 21:31:19 +0200 Subject: [PATCH] Refs #738. WIP. Matrix multiplication via OpenCL. Working on OpenCL buffers caching when passing matrices into kernel arguments. Got rid of templated version of Run() and RunMany(). --- Matrix.mqh | 207 +++++++++++++----------------- OpenCL.h | 298 ++++++++++--------------------------------- tests/OpenCLTest.mq5 | 5 +- 3 files changed, 159 insertions(+), 351 deletions(-) diff --git a/Matrix.mqh b/Matrix.mqh index eea71b0a3..45fbf5b6b 100644 --- a/Matrix.mqh +++ b/Matrix.mqh @@ -643,7 +643,7 @@ class MatrixDimension { } /** - * Extracts dimensions's values to the given array. Used internally. + * Extracts dimensions' values to the given array. Used internally. */ void FillArray(X& array[], int& offset) { int i; @@ -732,44 +732,6 @@ class MatrixDimension { enum ENUM_MATRIX_FLAGS { MATRIX_FLAGS_NONE, MATRIX_FLAGS_USE_OPENCL }; -/** - * Buffer used for CL operations. - */ -template -class MatrixOpenCLBuffer : public Dynamic { - // Flattened matrix data. - ARRAY(X, data); - - // Current version of the data. - long version; - - // CL buffer. - Ref buffer; - - public: - /** - * Constructor. - */ - MatrixOpenCLBuffer(int _size, unsigned int _flags) { - version = 0; - buffer = OpenCL::Alloc(_size, _flags); - ArrayResize(data, _size); - } - - /** - * Prepares buffer to be used by CL. Copies flattened data from the given matrix into buffer. - */ - void FillData(const Matrix& src) { - src.GetRawArray(data); - buffer REF_DEREF Write(data, version); - } - - /** - * Returns pointer to the CL buffer. - */ - OpenCLBuffer* GetBuffer() { return buffer.Ptr(); } -}; - /** * Matrix class. */ @@ -779,10 +741,14 @@ class Matrix { // First/root dimension. MatrixDimension* ptr_first_dimension; +#ifdef MATRIX_USE_OPENCL + // Map of data size -> CL buffer to be used e.g., by CL-based MatMul method. - static DictStruct>> cl_buffers_in_0; - static DictStruct>> cl_buffers_in_1; - static DictStruct>> cl_buffers_out; + static DictStruct> cl_buffers_in_0; + static DictStruct> cl_buffers_in_1; + static DictStruct> cl_buffers_out; + +#endif // Array with declaration of items per matrix's dimension. int dimensions[MATRIX_DIMENSIONS]; @@ -796,14 +762,25 @@ class Matrix { // Flags. int flags; - // Unique id of the matrix. - int uuid; + // Static counter, so each new matrix will have its own version. For new + // matrices new 32-bit range of versions are given and it should be more + // that enough. + static unsigned long version_counter; + + // Cache of previously flattened data. + ARRAY(X, flattened_cache); - // Static counter, so each matrix will have its own uuid. - static int uuid_counter; + // Version of the data that was flattened. + unsigned long flattened_cache_version; - // OpenCL program. + // Version of the data stored in dimensions arrays. Incremented after each + // change to this matrix. + unsigned long version; + + // OpenCL program for multi-core MatMul. static Ref cl_program_matmul; + + // OpenCL program for single-core MatMul. static Ref cl_program_matmul_single; /** @@ -814,7 +791,7 @@ class Matrix { #ifdef MATRIX_USE_OPENCL InitializeOpenCL(); #endif - uuid = uuid_counter++; + Initialize(); } /** @@ -826,7 +803,7 @@ class Matrix { #ifdef MATRIX_USE_OPENCL InitializeOpenCL(); #endif - uuid = uuid_counter++; + Initialize(); } /** @@ -837,7 +814,7 @@ class Matrix { #ifdef MATRIX_USE_OPENCL InitializeOpenCL(); #endif - uuid = uuid_counter++; + Initialize(); } /** @@ -853,8 +830,9 @@ class Matrix { InitializeOpenCL(); #endif - // We mark new matrix as unique one, even though we clone another matrix. - uuid = uuid_counter++; + // Note that we mark new matrix as unique one, even though we clone another + // matrix. + Initialize(); } /** @@ -864,6 +842,18 @@ class Matrix { private: Matrix(const Matrix* _right) {} + /** + * Initializes new or copy of another matrix. + */ + void Initialize() { + // Cache will have version lower that the data so matrix will be flattened in the first occasion. + flattened_cache_version = version_counter; + version = version_counter + 1; + + // Each new matrix will have its own 32-bit range of versions. + version_counter += UINT_MAX; + } + #ifdef MATRIX_USE_OPENCL /** @@ -891,13 +881,13 @@ class Matrix { /** * Returns/allocs and returns buffer of the given size to be used in CL operations as first input parameter. */ - static MatrixOpenCLBuffer* GetCLBufferInArg0(int _size) { - Ref> _buffer; + static OpenCLBuffer* GetCLBufferInArg0(int _size) { + Ref _buffer; _buffer = cl_buffers_in_0.GetByKey(_size, _buffer); if (!_buffer.IsSet()) { - _buffer = new MatrixOpenCLBuffer(_size, CL_MEM_READ_ONLY); + _buffer = new OpenCLBuffer(_size, CL_MEM_READ_ONLY); cl_buffers_in_0.Set(_size, _buffer); } @@ -907,13 +897,13 @@ class Matrix { /** * Returns/allocs and returns buffer of the given size to be used in CL operations as second input parameter. */ - static MatrixOpenCLBuffer* GetCLBufferInArg1(int _size) { - Ref> _buffer; + static OpenCLBuffer* GetCLBufferInArg1(int _size) { + Ref _buffer; _buffer = cl_buffers_in_1.GetByKey(_size, _buffer); if (!_buffer.IsSet()) { - _buffer = new MatrixOpenCLBuffer(_size, CL_MEM_READ_ONLY); + _buffer = new OpenCLBuffer(_size, CL_MEM_READ_ONLY); cl_buffers_in_1.Set(_size, _buffer); } @@ -923,13 +913,13 @@ class Matrix { /** * Returns/allocs and returns buffer of the given size to be used in CL operations as output parameter. */ - static MatrixOpenCLBuffer* GetCLBufferOutArg(int _size) { - Ref> _buffer; + static OpenCLBuffer* GetCLBufferOutArg(int _size) { + Ref _buffer; _buffer = cl_buffers_out.GetByKey(_size, _buffer); if (!_buffer.IsSet()) { - _buffer = new MatrixOpenCLBuffer(_size, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); + _buffer = new OpenCLBuffer(_size, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); cl_buffers_out.Set(_size, _buffer); } @@ -1411,7 +1401,6 @@ class Matrix { static void MatMul(Matrix& source, Matrix& target, Matrix& output) { #ifdef MATRIX_USE_OPENCL - // MatMulCL(source, target, output); MatMulCL(source, target, output); return; #endif @@ -1433,6 +1422,8 @@ class Matrix { } } +#ifdef MATRIX_USE_OPENCL + /** * Performs matrix multiplication via OpenCL. Note that MATRIX_USE_OPENCL must be defined in order matrix to use this * method. @@ -1446,48 +1437,32 @@ class Matrix { unsigned int _cols_a = _source.GetRange(1); unsigned int _cols_b = _target.GetRange(1); - OpenCLBuffer* _in_1 = GetCLBufferInArg0(_rows_a * _cols_a) PTR_DEREF GetBuffer(); - OpenCLBuffer* _in_2 = GetCLBufferInArg1(_cols_a * _cols_b) PTR_DEREF GetBuffer(); - OpenCLBuffer* _out = GetCLBufferOutArg(_rows_a * _cols_b) PTR_DEREF GetBuffer(); + // Reusable 0-offset. + static ARRAY(unsigned int, _global_work_offset) = {0U, 0U}; - double _in_1_data[]; - double _in_2_data[]; - double _out_data[]; + ARRAY(unsigned int, _global_work_size) = {(unsigned int)_rows_a, (unsigned int)_cols_b}; - // ArrayResize(_out_data, _out PTR_DEREF GetSizeItems()); + // @todo Make local work size adapt to output matrix size. + ARRAY(unsigned int, _local_work_size) = {1U, 1U}; - _source.GetRawArray(_in_1_data); - _target.GetRawArray(_in_2_data); - - _in_1 PTR_DEREF Write(_in_1_data); - _in_2 PTR_DEREF Write(_in_2_data); - - cl_program_matmul REF_DEREF SetArg(0, _in_1); - cl_program_matmul REF_DEREF SetArg(1, _in_2); - cl_program_matmul REF_DEREF SetArg(2, _out); + cl_program_matmul REF_DEREF SetArg(0, _source, OPENCL_MATRIX_ARG_IN_1); + cl_program_matmul REF_DEREF SetArg(1, _target, OPENCL_MATRIX_ARG_IN_2); + cl_program_matmul REF_DEREF SetArg(2, _output, OPENCL_MATRIX_ARG_OUT); cl_program_matmul REF_DEREF SetArg(3, (int)_rows_a); cl_program_matmul REF_DEREF SetArg(4, (int)_cols_a); cl_program_matmul REF_DEREF SetArg(5, (int)_cols_b); - ARRAY(unsigned int, _global_work_offset); - ARRAY(unsigned int, _global_work_size); - ARRAY(unsigned int, _local_work_size); - - ArrayPush(_global_work_offset, 0U); - ArrayPush(_global_work_offset, 0U); - ArrayPush(_global_work_size, (unsigned int)_rows_a); - ArrayPush(_global_work_size, (unsigned int)_cols_b); - ArrayPush(_local_work_size, 1U); - ArrayPush(_local_work_size, 1U); - - if (!cl_program_matmul REF_DEREF RunMany(2, _global_work_offset, _global_work_size, _local_work_size)) { + if (!cl_program_matmul REF_DEREF RunMany(2U, _global_work_offset, _global_work_size, _local_work_size)) { Alert("Error: Could not run Matrix::MatMulCL() over OpenCL!"); DebugBreak(); } - _out PTR_DEREF Read(_out_data); - + // Extracting data from + ARRAY(X, _out_data); + ArrayResize(_out_data, _rows_a * _cols_b); + _output.GetBuffer() PTR_DEREF Read(_out_data); _output.SetShape(_rows_a, _cols_b); + _output.FillFromArray(_out_data); } /** @@ -1535,27 +1510,7 @@ class Matrix { // _output.SetShape(num_outputs); } - static void MatMulCL_CPU(int Mdim, int Ndim, int Pdim, double& A[], double& B[], double& C[]) { - /* - for (int i = 0; i < Mdim; ++i) { - for (int j = 0; j < Pdim; ++j) { - C[i * Pdim + j] = 0.0; - for (int k = 0; k < Ndim; ++k) { - C[i * Pdim + j] += A[i * Ndim + k] * B[k * Pdim + j]; - } - } - } - */ - - int i, j, k; - for (i = 0; i < Ndim; i++) { - for (j = 0; j < Mdim; j++) { - for (k = 0; k < Pdim; k++) { // C(i,j) = sum(over k) A(i,k) * B(k,j) - C[i * Pdim + j] += A[i * Ndim + k] * B[k * Pdim + j]; - } - } - } - } +#endif /** * Performs matrix multiplication. @@ -1703,6 +1658,12 @@ class Matrix { return result; } + /** + * Fills matrix from flattened data. Shape of the array must be initialized + * before deflatenning. + */ + void Deflatten(const ARRAY_REF(X, array)) {} + /** * Initializer that generates tensors with a uniform distribution. */ @@ -2564,32 +2525,36 @@ class Matrix { } }; +#ifdef MATRIX_USE_OPENCL + #ifdef __MQL__ template -static int Matrix::uuid_counter = 0; +static unsigned long Matrix::version_counter = 0UL; template static Ref Matrix::cl_program_matmul; template static Ref Matrix::cl_program_matmul_single; template -static DictStruct>> Matrix::cl_buffers_in_0; +static DictStruct> Matrix::cl_buffers_in_0; template -static DictStruct>> Matrix::cl_buffers_in_1; +static DictStruct> Matrix::cl_buffers_in_1; template -static DictStruct>> Matrix::cl_buffers_out; +static DictStruct> Matrix::cl_buffers_out; #else template -static int Matrix::uuid_counter = 0; +static unsigned long Matrix::version_counter = 0UL; template static Ref Matrix::cl_program_matmul; template static Ref Matrix::cl_program_matmul_single; template -static DictStruct>> Matrix::cl_buffers_in_0; +static DictStruct> Matrix::cl_buffers_in_0; template -static DictStruct>> Matrix::cl_buffers_in_1; +static DictStruct> Matrix::cl_buffers_in_1; template -static DictStruct>> Matrix::cl_buffers_out; +static DictStruct> Matrix::cl_buffers_out; #endif #endif + +#endif \ No newline at end of file diff --git a/OpenCL.h b/OpenCL.h index b6fe7bc54..0ce41a1f1 100644 --- a/OpenCL.h +++ b/OpenCL.h @@ -9,6 +9,11 @@ // Forward declarations; class OpenCLProgram; +template +class Matrix; + +// Type of the matrix passed as argument to the OpenCLProgram. +enum ENUM_OPENCL_MATRIX_ARG { OPENCL_MATRIX_ARG_IN_1, OPENCL_MATRIX_ARG_IN_2, OPENCL_MATRIX_ARG_OUT }; /** * Memory buffer. @@ -20,8 +25,8 @@ class OpenCLBuffer : public Dynamic { // Allocated buffer size. int buffer_size; - // Buffer version. Should be incremented after each change. - long version; + // Version of the data. The same one that was passed to the Write() method. + unsigned long version; public: /** @@ -29,10 +34,15 @@ class OpenCLBuffer : public Dynamic { */ OpenCLBuffer(int _size, unsigned int _flags = CL_MEM_READ_WRITE); + /** + * Checks whether stored data version differs from the passed version. + */ + bool RequiresReupload(unsigned long _data_version) { return _data_version == ULONG_MAX || version != _data_version; } + /** * Writes/uploads data into buffer if needed. */ - void Write(const ARRAY_REF(double, _arr), long _arr_version = -1) { + void Write(const ARRAY_REF(double, _arr), unsigned long _data_version = ULONG_MAX) { if (ArraySize(_arr) > buffer_size) { Alert("Array passed is too large for the allocated buffer. Tries to pass ", ArraySize(_arr), " elements into buffer of size ", buffer_size, "."); @@ -40,15 +50,13 @@ class OpenCLBuffer : public Dynamic { return; } - // Do we need to reupload data into GPU? - if (_arr_version != -1 && _arr_version <= version) { - // Buffer has already up-to-date data. + if (!RequiresReupload(_data_version)) { return; } CLBufferWrite(buffer_handle, _arr); - version = (_arr_version != -1) ? _arr_version : (version + 1); + version = _data_version; } /** @@ -78,7 +86,7 @@ class OpenCLBuffer : public Dynamic { /** * Returns data version. */ - long GetVersion() { return version; } + unsigned long GetVersion() { return version; } /** * Returns handle to buffer. @@ -109,7 +117,7 @@ class OpenCLProgram : public Dynamic { int arg_handles[OPENCL_PROGRAM_MAX_ARGS]; // Version of argument data. Used to check if buffer needs to be reuploaded. - long arg_versions[OPENCL_PROGRAM_MAX_ARGS]; + unsigned long arg_versions[OPENCL_PROGRAM_MAX_ARGS]; public: /** @@ -140,6 +148,13 @@ class OpenCLProgram : public Dynamic { */ void SetArgLocalMem(int _index, unsigned long _mem_size) { CLSetKernelArgMemLocal(kernel_handle, _index, _mem_size); } + /** + * Checks whether given argument requires reupload of the buffer into GPU. + */ + bool RequiresReupload(int _index, OpenCLBuffer* _buffer, unsigned long _data_version) { + return _buffer PTR_DEREF GetHandle() != arg_handles[_index] || _data_version != arg_versions[_index]; + } + /** * Passes argument to the kernel. Will not set kernel argument if not needed. * @@ -157,10 +172,10 @@ class OpenCLProgram : public Dynamic { * which you can pass version of your data, so no reupload will take place if * your version isn't greater that the one already set in the buffer. */ - void SetArg(int _index, OpenCLBuffer* _buffer) { - if (_buffer PTR_DEREF GetHandle() == arg_handles[_index] && - _buffer PTR_DEREF GetVersion() >= arg_versions[_index]) { - // Already uploaded recent version. + void SetArg(int _index, OpenCLBuffer* _buffer, unsigned long _data_version) { + if (!RequiresReupload(_index, _buffer, _data_version)) { + // Buffer is already set via CLSetKernelArgMem() and argument's version + // is the same as _data_version. return; } @@ -174,115 +189,53 @@ class OpenCLProgram : public Dynamic { } /** - * Executes a single kernel. - */ - bool Run() { - if (!CLExecute(kernel_handle)) { - Alert("OpenCL error occured when tried to run kernel: ", GetLastError(), "!"); - return false; + * Passes matrix argument to the kernel. Will not upload data if not needed. + * + * The idea is to retrieve existing buffer that matches matrix size and its + * purpose. If such buffer already exists in the same version in the argument + * slot then no reupload will take place. + */ + template + void SetArg(int _index, Matrix& _matrix, ENUM_OPENCL_MATRIX_ARG _matrix_type) { + unsigned long _matrix_data_version = _matrix.GetVersion(); + OpenCLBuffer* _buffer = nullptr; + + switch (_matrix_type) { + case OPENCL_MATRIX_ARG_IN_1: + _buffer = GetCLBufferInArg0(_matrix.GetSize()); + break; + + case OPENCL_MATRIX_ARG_IN_2: + _buffer = GetCLBufferInArg1(_matrix.GetSize()); + break; + + case OPENCL_MATRIX_ARG_OUT: + _buffer = GetCLBufferOutArg(_matrix.GetSize()); + break; } - return true; - } + if (RequiresReupload(_index, _buffer, _matrix_data_version)) { + // Flattening matrix data in order to upload it into GPU. + double _flattened_data[]; + _matrix.GetRawArray(_flattened_data); - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a) { - SetArg(0, a); - return Run(); - } + _buffer PTR_DEREF Write(_flattened_data) - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b) { - SetArg(0, a); - SetArg(1, b); - return Run(); - } - - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b, C c) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - return Run(); - } - - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b, C c, D d) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - return Run(); - } - - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b, C c, D d, E e) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - return Run(); - } - - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b, C c, D d, E e, F f) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - return Run(); + // Do we need to reupload the data? + SetArg(_index, _buffer, _matrix_data_version); + } } /** - * Executes a single kernel. Allows passing arugments to kernel. + * Executes a single kernel. */ - template - bool Run(A a, B b, C c, D d, E e, F f, G g) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - SetArg(6, g); - return Run(); - } + bool Run() { + if (!CLExecute(kernel_handle)) { + Alert("OpenCL error occured when tried to run kernel: ", GetLastError(), "!"); + return false; + } - /** - * Executes a single kernel. Allows passing arugments to kernel. - */ - template - bool Run(A a, B b, C c, D d, E e, F f, G g, H h) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - SetArg(6, g); - SetArg(7, h); - return Run(); + return true; } /** @@ -298,121 +251,6 @@ class OpenCLProgram : public Dynamic { return true; } - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a) { - SetArg(0, a); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b) { - SetArg(0, a); - SetArg(1, b); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c, D d) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c, D d, E e) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c, D d, E e, F f) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c, D d, E e, F f, G g) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - SetArg(6, g); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - - /** - * Executes multiple kernels where work is subdivided among kernels. Allows passing arugments to kernels. - */ - template - bool RunMany(unsigned int _dimension, const ARRAY_REF(unsigned int, _global_work_offset), - const ARRAY_REF(unsigned int, _global_work_size), const ARRAY_REF(unsigned int, _local_work_size), A a, - B b, C c, D d, E e, F f, G g, H h) { - SetArg(0, a); - SetArg(1, b); - SetArg(2, c); - SetArg(3, d); - SetArg(4, e); - SetArg(5, f); - SetArg(6, g); - SetArg(7, h); - return RunMany(_dimension, _global_work_offset, _global_work_size, _local_work_size); - } - /** * Returns handle to OpenCL program. */ @@ -542,5 +380,7 @@ OpenCLBuffer::OpenCLBuffer(int _size, unsigned int _flags) { DebugBreak(); } buffer_size = _size; - version = 0; + // Ensuring there won't be initial version clash when checking if buffer data + // need to be reuploaded. + version = ULONG_MAX; } diff --git a/tests/OpenCLTest.mq5 b/tests/OpenCLTest.mq5 index c26cf3153..0e0d2ef50 100644 --- a/tests/OpenCLTest.mq5 +++ b/tests/OpenCLTest.mq5 @@ -42,7 +42,9 @@ int OnInit() { Ref buffer = OpenCL::Alloc(1 /* 1 double */, CL_MEM_READ_WRITE); - if (!program REF_DEREF Run(buffer.Ptr())) { + program REF_DEREF SetArg(0, buffer.Ptr(), ULONG_MAX); + + if (!program REF_DEREF Run()) { Alert("Error running program!"); } @@ -57,6 +59,7 @@ int OnInit() { Print("in2 shape: ", in2 PTR_DEREF GetRange(0), " x ", in2 PTR_DEREF GetRange(1)); out = in1 PTR_DEREF MatMul(in2); Print("out shape: ", out PTR_DEREF GetRange(0), " x ", out PTR_DEREF GetRange(1)); + Print("out data: ", out PTR_DEREF ToString()); delete in1; delete in2;