From 4795d11cf63e5f0a23b1b2f06f92ad7661c5a5da Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 18 Jul 2023 23:25:00 +0200 Subject: [PATCH 1/3] Stop ECAL unpacking on GPU for the rest of the block if a bad block is detected. --- .../EcalRawToDigi/plugins/UnpackGPU.cu | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu index b80831e104f84..f4cc3175b1aea 100644 --- a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu +++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu @@ -272,32 +272,53 @@ namespace ecal { // get the next channel coordinates uint32_t nchannels = (block_length - 1) / 3; + bool bad_block = false; + __shared__ uint32_t ch_with_bad_block; // 1 threads per channel in this block for (uint32_t ich = 0; ich < nchannels; ich += NTHREADS) { auto const i_to_access = ich + threadIdx.x; - // threads outside of the range -> leave the loop - if (i_to_access >= nchannels) - break; + if (i_to_access == 0) { + ch_with_bad_block = 999999; + } - // inc the channel's counter and get the pos where to store - auto const wdata = current_tower_block[1 + i_to_access * 3]; - uint8_t const stripid = wdata & 0x7; - uint8_t const xtalid = (wdata >> 4) & 0x7; + uint64_t wdata; + uint8_t stripid; + uint8_t xtalid; + + // threads must be inside the range (no break here because of __syncthreads() afterwards) + if (i_to_access < nchannels) { + // inc the channel's counter and get the pos where to store + wdata = current_tower_block[1 + i_to_access * 3]; + stripid = wdata & 0x7; + xtalid = (wdata >> 4) & 0x7; + + // check if the stripid and xtalid are in the allowed range and if not skip the rest of the block + if (stripid < ElectronicsIdGPU::MIN_STRIPID || stripid > ElectronicsIdGPU::MAX_STRIPID || + xtalid < ElectronicsIdGPU::MIN_XTALID || xtalid > ElectronicsIdGPU::MAX_XTALID) { + bad_block = true; + } + if (i_to_access > 0) { + // check if the stripid has increased or that the xtalid has increased from the previous data word. If not something is wrong and the rest of the block is skipped. + auto const prev_i_to_access = ich + threadIdx.x - 1; + auto const prevwdata = current_tower_block[1 + prev_i_to_access * 3]; + uint8_t const laststripid = prevwdata & 0x7; + uint8_t const lastxtalid = (prevwdata >> 4) & 0x7; + if ((stripid == laststripid && xtalid <= lastxtalid) || (stripid < laststripid)) { + bad_block = true; + } + } + } - // check if the stripid and xtalid are in the allowed range and if not skip the rest of the block - if (stripid < ElectronicsIdGPU::MIN_STRIPID || stripid > ElectronicsIdGPU::MAX_STRIPID || - xtalid < ElectronicsIdGPU::MIN_XTALID || xtalid > ElectronicsIdGPU::MAX_XTALID) { - break; + // check if this thread has the lowest bad block + if (bad_block) { + atomicMin(&ch_with_bad_block, i_to_access); } - if (ich > 0 || threadIdx.x > 0) { - // check if the stripid has increased or that the xtalid has increased from the previous data word. If not something is wrong and the rest of the block is skipped. - auto const prev_i_to_access = ich + threadIdx.x - 1; - auto const prevwdata = current_tower_block[1 + prev_i_to_access * 3]; - uint8_t const laststripid = prevwdata & 0x7; - uint8_t const lastxtalid = (prevwdata >> 4) & 0x7; - if ((stripid == laststripid && xtalid <= lastxtalid) || (stripid < laststripid)) { - break; - } + + __syncthreads(); + + // threads outside of the range or bad block detected in this thread or one working on a lower block -> leave the loop + if (i_to_access >= nchannels || i_to_access >= ch_with_bad_block) { + break; } ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid}; From cb32efec299ca0db4aee2ca679dbaa0b524c1962 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 25 Jul 2023 13:13:10 +0200 Subject: [PATCH 2/3] Additional syncthreads and replace break with continue. --- EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu index f4cc3175b1aea..78314f7dda1f5 100644 --- a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu +++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu @@ -278,7 +278,7 @@ namespace ecal { for (uint32_t ich = 0; ich < nchannels; ich += NTHREADS) { auto const i_to_access = ich + threadIdx.x; if (i_to_access == 0) { - ch_with_bad_block = 999999; + ch_with_bad_block = std::numeric_limits::max(); } uint64_t wdata; @@ -286,7 +286,7 @@ namespace ecal { uint8_t xtalid; // threads must be inside the range (no break here because of __syncthreads() afterwards) - if (i_to_access < nchannels) { + if (i_to_access < nchannels && i_to_access < ch_with_bad_block) { // inc the channel's counter and get the pos where to store wdata = current_tower_block[1 + i_to_access * 3]; stripid = wdata & 0x7; @@ -299,7 +299,7 @@ namespace ecal { } if (i_to_access > 0) { // check if the stripid has increased or that the xtalid has increased from the previous data word. If not something is wrong and the rest of the block is skipped. - auto const prev_i_to_access = ich + threadIdx.x - 1; + auto const prev_i_to_access = i_to_access - 1; auto const prevwdata = current_tower_block[1 + prev_i_to_access * 3]; uint8_t const laststripid = prevwdata & 0x7; uint8_t const lastxtalid = (prevwdata >> 4) & 0x7; @@ -309,16 +309,18 @@ namespace ecal { } } + __syncthreads(); + // check if this thread has the lowest bad block - if (bad_block) { + if (bad_block && i_to_access < ch_with_bad_block) { atomicMin(&ch_with_bad_block, i_to_access); } __syncthreads(); - // threads outside of the range or bad block detected in this thread or one working on a lower block -> leave the loop + // threads outside of the range or bad block detected in this thread or one working on a lower block -> stop this loop iteration here if (i_to_access >= nchannels || i_to_access >= ch_with_bad_block) { - break; + continue; } ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid}; From 55fc14bb46b06a8821be066ec00d2e1e529fce1c Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 26 Jul 2023 10:37:42 +0200 Subject: [PATCH 3/3] Put the syncthreads at the right place. --- EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu index 78314f7dda1f5..2ba5c508c588b 100644 --- a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu +++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu @@ -281,6 +281,9 @@ namespace ecal { ch_with_bad_block = std::numeric_limits::max(); } + // make sure the shared memory is initialised for all threads + __syncthreads(); + uint64_t wdata; uint8_t stripid; uint8_t xtalid; @@ -309,13 +312,12 @@ namespace ecal { } } - __syncthreads(); - // check if this thread has the lowest bad block if (bad_block && i_to_access < ch_with_bad_block) { atomicMin(&ch_with_bad_block, i_to_access); } + // make sure that all threads that have to have set the ch_with_bad_block shared memory __syncthreads(); // threads outside of the range or bad block detected in this thread or one working on a lower block -> stop this loop iteration here