diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index 3998e02545..f64c3755f5 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -1045,19 +1045,19 @@ KERNEL( // imageData is input image (24-bits/pixel) // pix is output image (1-bit/pixel) KERNEL( -\n#define CHAR_VEC_WIDTH 8 \n +\n#define CHAR_VEC_WIDTH 4 \n \n#define PIXELS_PER_WORD 32 \n \n#define PIXELS_PER_BURST 8 \n \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n typedef union { uchar s[PIXELS_PER_BURST*NUM_CHANNELS]; - uchar8 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH]; + uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH]; } charVec; __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix( - __global const uchar8 *imageData, + __global const uchar4 *imageData, int height, int width, int wpl, // words per line @@ -1066,6 +1066,7 @@ void kernel_ThresholdRectToPix( __global int *pix) { // declare variables + uint pad = PIXELS_PER_WORD * wpl - width;//number of padding bits at the end of each output line int pThresholds[NUM_CHANNELS]; int pHi_Values[NUM_CHANNELS]; for ( int i = 0; i < NUM_CHANNELS; i++) { @@ -1076,14 +1077,14 @@ void kernel_ThresholdRectToPix( // for each word (32 pixels) in output image for ( uint w = get_global_id(0); w < wpl*height; w += get_global_size(0) ) { unsigned int word = 0; // all bits start at zero - + //decrease the pixel index for the padding at the end of each output line (=number of lines * padding) + uint pxIdxOffset = ( w / wpl) * pad;// = ( ( PIXELS_PER_WORD * w) / ( width + pad)) * pad; // for each burst in word for ( int b = 0; b < BURSTS_PER_WORD; b++) { - // load burst charVec pixels; for ( int i = 0; i < (PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH; i++ ) { - pixels.v[i] = imageData[w*(BURSTS_PER_WORD*(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH) + b*((PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH) + i]; + pixels.v[i] = imageData[w*(BURSTS_PER_WORD*(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH) + b*((PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH) + i - pxIdxOffset]; } // for each pixel in burst @@ -1091,7 +1092,7 @@ void kernel_ThresholdRectToPix( for ( int c = 0; c < NUM_CHANNELS; c++) { unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c]; if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) { - word |= (0x80000000 >> ((b*PIXELS_PER_BURST+p)&31)); + word |= (((uint)0x80000000) >> ((b*PIXELS_PER_BURST+p)&31)); } } }