From df36c85f26194b34e27d1fd24233e41a059501fa Mon Sep 17 00:00:00 2001 From: Stefan Weil <sw@weilnetz.de> Date: Fri, 19 May 2017 10:11:14 +0200 Subject: [PATCH] opencl: Remove more unused code Signed-off-by: Stefan Weil <sw@weilnetz.de> --- opencl/oclkernels.h | 111 --------------------------------------- opencl/openclwrapper.cpp | 38 -------------- 2 files changed, 149 deletions(-) diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index a36b664e3f..1788baa9f9 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -75,38 +75,6 @@ KERNEL( }\n ) -KERNEL( -\n__kernel void pixAND(__global int *dword, __global int *sword, __global int *outword, - const int wpl, const int h) -{ - const unsigned int row = get_global_id(1); - const unsigned int col = get_global_id(0); - const unsigned int pos = row * wpl + col; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - *(outword + pos) = *(dword + pos) & (*(sword + pos)); -}\n -) - -KERNEL( -\n__kernel void pixOR(__global int *dword, __global int *sword, __global int *outword, - const int wpl, const int h) -{ - const unsigned int row = get_global_id(1); - const unsigned int col = get_global_id(0); - const unsigned int pos = row * wpl + col; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - *(outword + pos) = *(dword + pos) | (*(sword + pos)); -}\n -) - KERNEL( \n__kernel void morphoDilateHor_5x5(__global int *sword,__global int *dword, const int wpl, const int h) @@ -885,36 +853,6 @@ void kernel_HistogramRectOneChannel( } ) - -KERNEL( -// unused -\n __attribute__((reqd_work_group_size(256, 1, 1))) -\n __kernel -\n void kernel_HistogramRectAllChannels_Grey( -\n __global const uchar* data, -\n uint numPixels, -\n __global uint *histBuffer) { // each wg will write HIST_SIZE*NUM_CHANNELS into this result; cpu will accumulate across wg's -\n -\n /* declare variables */ -\n -\n // work indices -\n size_t groupId = get_group_id(0); -\n size_t localId = get_local_id(0); // 0 -> 256-1 -\n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1 -\n uint numThreads = get_global_size(0); -\n -\n /* accumulate in global memory */ -\n for ( uint pc = get_global_id(0); pc < numPixels; pc += get_global_size(0) ) { -\n uchar value = data[ pc ]; -\n int idx = value * get_global_size(0) + get_global_id(0); -\n histBuffer[ idx ]++; -\n -\n } -\n -\n } // kernel_HistogramRectAllChannels_Grey - -) - // HistogramRect Kernel: Reduction // only supports 4 channels // each work group handles a single channel of a single histogram bin @@ -1000,55 +938,6 @@ void kernel_HistogramRectOneChannelReduction( } // kernel_HistogramRectOneChannelReduction ) - -KERNEL( -// unused - // each work group (x256) handles a histogram bin -\n __attribute__((reqd_work_group_size(256, 1, 1))) -\n __kernel -\n void kernel_HistogramRectAllChannelsReduction_Grey( -\n int n, // pixel redundancy that needs to be accumulated -\n __global uint *histBuffer, -\n __global uint* histResult) { // each wg accumulates 1 bin -\n -\n /* declare variables */ -\n -\n // work indices -\n size_t groupId = get_group_id(0); -\n size_t localId = get_local_id(0); // 0 -> 256-1 -\n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1 -\n uint numThreads = get_global_size(0); -\n unsigned int hist = 0; -\n -\n /* accumulate in global memory */ -\n for ( uint p = 0; p < n; p+=GROUP_SIZE) { -\n hist += histBuffer[ (get_group_id(0)*n + p)]; -\n } -\n -\n /* reduction in local memory */ -\n // populate local memory -\n __local unsigned int localHist[GROUP_SIZE]; - -\n localHist[localId] = hist; -\n barrier(CLK_LOCAL_MEM_FENCE); -\n -\n for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) { -\n if (localId < stride) { -\n hist = localHist[ (localId+stride)]; -\n } -\n barrier(CLK_LOCAL_MEM_FENCE); -\n if (localId < stride) { -\n localHist[ localId] += hist; -\n } -\n barrier(CLK_LOCAL_MEM_FENCE); -\n } -\n -\n if (localId == 0) -\n histResult[get_group_id(0)] = localHist[0]; -\n -\n } // kernel_HistogramRectAllChannelsReduction_Grey -) - // ThresholdRectToPix Kernel // only supports 4 channels // imageData is input image (24-bits/pixel) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 6f39bdea7e..88483f6e90 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -1691,44 +1691,6 @@ static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) return status; } -//pix OR operation: outbuffer = buffer1 | buffer2 -static cl_int -pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer) -{ - cl_int status; - size_t globalThreads[2]; - int gsize; - size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y}; - - gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixOR", &status ); - CHECK_OPENCL(status, "clCreateKernel pixOR"); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &buffer1); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &buffer2); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(cl_mem), - &outbuffer); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, - nullptr, globalThreads, localThreads, 0, - nullptr, nullptr); - - return status; -} - //output = buffer1 & ~(buffer2) static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,