1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

#ifndef B3_LAUNCHER_CL_H
#define B3_LAUNCHER_CL_H

#include "b3BufferInfoCL.h"
#include "Bullet3Common/b3MinMax.h"
#include "b3OpenCLArray.h"
#include <stdio.h>

#define B3_DEBUG_SERIALIZE_CL

#ifdef _WIN32
#pragma warning(disable : 4996)
#endif
#define B3_CL_MAX_ARG_SIZE 16
B3_ATTRIBUTE_ALIGNED16(struct)<--- syntax error
b3KernelArgData
{
	int m_isBuffer;
	int m_argIndex;
	int m_argSizeInBytes;
	int m_unusedPadding;
	union {
		cl_mem m_clBuffer;
		unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
	};
};

class b3LauncherCL
{
	cl_command_queue m_commandQueue;
	cl_kernel m_kernel;
	int m_idx;

	b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
	int m_serializationSizeInBytes;
	bool m_enableSerialization;

	const char* m_name;

public:
	b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;

	b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);

	virtual ~b3LauncherCL();

	void setBuffer(cl_mem clBuffer);

	void setBuffers(b3BufferInfoCL* buffInfo, int n);

	int getSerializationBufferSize() const
	{
		return m_serializationSizeInBytes;
	}

	int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);

	inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);

	int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);

	int getNumArguments() const
	{
		return m_kernelArguments.size();
	}

	b3KernelArgData getArgument(int index)
	{
		return m_kernelArguments[index];
	}

	void serializeToFile(const char* fileName, int numWorkItems);

	template <typename T>
	inline void setConst(const T& consts)
	{
		int sz = sizeof(T);
		b3Assert(sz <= B3_CL_MAX_ARG_SIZE);

		if (m_enableSerialization)
		{
			b3KernelArgData kernelArg;
			kernelArg.m_argIndex = m_idx;
			kernelArg.m_isBuffer = 0;
			T* destArg = (T*)kernelArg.m_argData;
			*destArg = consts;
			kernelArg.m_argSizeInBytes = sizeof(T);
			m_kernelArguments.push_back(kernelArg);
			m_serializationSizeInBytes += sizeof(b3KernelArgData);
		}

		cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
		b3Assert(status == CL_SUCCESS);
	}

	inline void launch1D(int numThreads, int localSize = 64)
	{
		launch2D(numThreads, 1, localSize, 1);
	}

	inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
	{
		size_t gRange[3] = {1, 1, 1};
		size_t lRange[3] = {1, 1, 1};
		lRange[0] = localSizeX;
		lRange[1] = localSizeY;
		gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
		gRange[0] *= lRange[0];
		gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
		gRange[1] *= lRange[1];

		cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
											   m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
		if (status != CL_SUCCESS)
		{
			printf("Error: OpenCL status = %d\n", status);
		}
		b3Assert(status == CL_SUCCESS);
	}

	void enableSerialization(bool serialize)
	{
		m_enableSerialization = serialize;
	}
};

#endif  //B3_LAUNCHER_CL_H