1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143 | #ifndef B3_GPU_SAP_BROADPHASE_H
#define B3_GPU_SAP_BROADPHASE_H
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
class b3Vector3;
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
#include "b3SapAabb.h"
#include "Bullet3Common/shared/b3Int2.h"
#include "b3GpuBroadphaseInterface.h"
class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
{
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_queue;
cl_kernel m_flipFloatKernel;
cl_kernel m_scatterKernel;
cl_kernel m_copyAabbsKernel;
cl_kernel m_sapKernel;
cl_kernel m_sap2Kernel;
cl_kernel m_prepareSumVarianceKernel;
class b3RadixSort32CL* m_sorter;
///test for 3d SAP
b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;
b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
b3OpenCLArray<int> m_addedCountGPU;
b3OpenCLArray<int> m_removedCountGPU;
int m_currentBuffer;
public:
b3OpenCLArray<int> m_pairCount;
b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()<--- Function in derived class
{
return m_allAabbsGPU;
}
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()<--- Function in derived class
{
return m_allAabbsCPU;
}
b3OpenCLArray<b3Vector3> m_sum;
b3OpenCLArray<b3Vector3> m_sum2;
b3OpenCLArray<b3Vector3> m_dst;
b3OpenCLArray<int> m_smallAabbsMappingGPU;
b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
b3OpenCLArray<int> m_largeAabbsMappingGPU;
b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
b3OpenCLArray<b3Int4> m_overlappingPairs;
//temporary gpu work memory
b3OpenCLArray<b3SortData> m_gpuSmallSortData;
b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
class b3PrefixScanFloat4CL* m_prefixScanFloat4;
enum b3GpuSapKernelType
{
B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU = 1,
B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
B3_GPU_SAP_KERNEL_ORIGINAL,
B3_GPU_SAP_KERNEL_BARRIER,
B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
};
b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType = B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
virtual ~b3GpuSapBroadphase();
static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx, cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
}
static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx, cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
}
static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx, cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_ORIGINAL);
}
static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx, cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BARRIER);
}
static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx, cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
}
virtual void calculateOverlappingPairs(int maxPairs);<--- Function in derived class
virtual void calculateOverlappingPairsHost(int maxPairs);<--- Function in derived class
void reset();
void init3dSap();
virtual void calculateOverlappingPairsHostIncremental3Sap();
virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);<--- Function in derived class
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);<--- Function in derived class
//call writeAabbsToGpu after done making all changes (createProxy etc)
virtual void writeAabbsToGpu();<--- Function in derived class
virtual cl_mem getAabbBufferWS();<--- Function in derived class
virtual int getNumOverlap();<--- Function in derived class
virtual cl_mem getOverlappingPairBuffer();<--- Function in derived class
virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();<--- Function in derived class
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();<--- Function in derived class
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();<--- Function in derived class
};
#endif //B3_GPU_SAP_BROADPHASE_H
|