-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #172 from ghugo83/clusterizer
[alpaka] Port Clusterizer to Alpaka
- Loading branch information
Showing
7 changed files
with
1,087 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
#ifndef Geometry_TrackerGeometryBuilder_phase1PixelTopology_h | ||
#define Geometry_TrackerGeometryBuilder_phase1PixelTopology_h | ||
|
||
#include <cstdint> | ||
#include <array> | ||
|
||
namespace phase1PixelTopology { | ||
|
||
constexpr uint16_t numRowsInRoc = 80; | ||
constexpr uint16_t numColsInRoc = 52; | ||
constexpr uint16_t lastRowInRoc = numRowsInRoc - 1; | ||
constexpr uint16_t lastColInRoc = numColsInRoc - 1; | ||
|
||
constexpr uint16_t numRowsInModule = 2 * numRowsInRoc; | ||
constexpr uint16_t numColsInModule = 8 * numColsInRoc; | ||
constexpr uint16_t lastRowInModule = numRowsInModule - 1; | ||
constexpr uint16_t lastColInModule = numColsInModule - 1; | ||
|
||
constexpr int16_t xOffset = -81; | ||
constexpr int16_t yOffset = -54 * 4; | ||
|
||
constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule) * uint32_t(numColsInModule); | ||
|
||
constexpr uint32_t numberOfModules = 1856; | ||
constexpr uint32_t numberOfLayers = 10; | ||
constexpr uint32_t layerStart[numberOfLayers + 1] = {0, | ||
96, | ||
320, | ||
672, // barrel | ||
1184, | ||
1296, | ||
1408, // positive endcap | ||
1520, | ||
1632, | ||
1744, // negative endcap | ||
numberOfModules}; | ||
constexpr char const* layerName[numberOfLayers] = { | ||
"BL1", | ||
"BL2", | ||
"BL3", | ||
"BL4", // barrel | ||
"E+1", | ||
"E+2", | ||
"E+3", // positive endcap | ||
"E-1", | ||
"E-2", | ||
"E-3" // negative endcap | ||
}; | ||
|
||
constexpr uint32_t numberOfModulesInBarrel = 1184; | ||
constexpr uint32_t numberOfLaddersInBarrel = numberOfModulesInBarrel / 8; | ||
|
||
template <class Function, std::size_t... Indices> | ||
constexpr auto map_to_array_helper(Function f, std::index_sequence<Indices...>) | ||
-> std::array<typename std::result_of<Function(std::size_t)>::type, sizeof...(Indices)> { | ||
return {{f(Indices)...}}; | ||
} | ||
|
||
template <int N, class Function> | ||
constexpr auto map_to_array(Function f) -> std::array<typename std::result_of<Function(std::size_t)>::type, N> { | ||
return map_to_array_helper(f, std::make_index_sequence<N>{}); | ||
} | ||
|
||
constexpr uint32_t findMaxModuleStride() { | ||
bool go = true; | ||
int n = 2; | ||
while (go) { | ||
for (uint8_t i = 1; i < 11; ++i) { | ||
if (layerStart[i] % n != 0) { | ||
go = false; | ||
break; | ||
} | ||
} | ||
if (!go) | ||
break; | ||
n *= 2; | ||
} | ||
return n / 2; | ||
} | ||
|
||
constexpr uint32_t maxModuleStride = findMaxModuleStride(); | ||
|
||
constexpr uint8_t findLayer(uint32_t detId) { | ||
for (uint8_t i = 0; i < 11; ++i) | ||
if (detId < layerStart[i + 1]) | ||
return i; | ||
return 11; | ||
} | ||
|
||
constexpr uint8_t findLayerFromCompact(uint32_t detId) { | ||
detId *= maxModuleStride; | ||
for (uint8_t i = 0; i < 11; ++i) | ||
if (detId < layerStart[i + 1]) | ||
return i; | ||
return 11; | ||
} | ||
|
||
constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride; | ||
constexpr std::array<uint8_t, layerIndexSize> layer = map_to_array<layerIndexSize>(findLayerFromCompact); | ||
|
||
constexpr bool validateLayerIndex() { | ||
bool res = true; | ||
for (auto i = 0U; i < numberOfModules; ++i) { | ||
auto j = i / maxModuleStride; | ||
res &= (layer[j] < 10); | ||
res &= (i >= layerStart[layer[j]]); | ||
res &= (i < layerStart[layer[j] + 1]); | ||
} | ||
return res; | ||
} | ||
|
||
static_assert(validateLayerIndex(), "layer from detIndex algo is buggy"); | ||
|
||
// this is for the ROC n<512 (upgrade 1024) | ||
constexpr inline uint16_t divu52(uint16_t n) { | ||
n = n >> 2; | ||
uint16_t q = (n >> 1) + (n >> 4); | ||
q = q + (q >> 4) + (q >> 5); | ||
q = q >> 3; | ||
uint16_t r = n - q * 13; | ||
return q + ((r + 3) >> 4); | ||
} | ||
|
||
constexpr inline bool isEdgeX(uint16_t px) { return (px == 0) | (px == lastRowInModule); } | ||
|
||
constexpr inline bool isEdgeY(uint16_t py) { return (py == 0) | (py == lastColInModule); } | ||
|
||
constexpr inline uint16_t toRocX(uint16_t px) { return (px < numRowsInRoc) ? px : px - numRowsInRoc; } | ||
|
||
constexpr inline uint16_t toRocY(uint16_t py) { | ||
auto roc = divu52(py); | ||
return py - 52 * roc; | ||
} | ||
|
||
constexpr inline bool isBigPixX(uint16_t px) { return (px == 79) | (px == 80); } | ||
|
||
constexpr inline bool isBigPixY(uint16_t py) { | ||
auto ly = toRocY(py); | ||
return (ly == 0) | (ly == lastColInRoc); | ||
} | ||
|
||
constexpr inline uint16_t localX(uint16_t px) { | ||
auto shift = 0; | ||
if (px > lastRowInRoc) | ||
shift += 1; | ||
if (px > numRowsInRoc) | ||
shift += 1; | ||
return px + shift; | ||
} | ||
|
||
constexpr inline uint16_t localY(uint16_t py) { | ||
auto roc = divu52(py); | ||
auto shift = 2 * roc; | ||
auto yInRoc = py - 52 * roc; | ||
if (yInRoc > 0) | ||
shift += 1; | ||
return py + shift; | ||
} | ||
|
||
//FIXME move it elsewhere? | ||
struct AverageGeometry { | ||
static constexpr auto numberOfLaddersInBarrel = phase1PixelTopology::numberOfLaddersInBarrel; | ||
float ladderZ[numberOfLaddersInBarrel]; | ||
float ladderX[numberOfLaddersInBarrel]; | ||
float ladderY[numberOfLaddersInBarrel]; | ||
float ladderR[numberOfLaddersInBarrel]; | ||
float ladderMinZ[numberOfLaddersInBarrel]; | ||
float ladderMaxZ[numberOfLaddersInBarrel]; | ||
float endCapZ[2]; // just for pos and neg Layer1 | ||
}; | ||
|
||
} // namespace phase1PixelTopology | ||
|
||
#endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h |
Empty file.
147 changes: 147 additions & 0 deletions
147
src/alpaka/plugin-SiPixelClusterizer/gpuClusterChargeCut.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h | ||
#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h | ||
|
||
#include <cstdint> | ||
#include <cstdio> | ||
|
||
#include "AlpakaCore/alpakaConfig.h" | ||
#include "AlpakaCore/alpakaWorkDivHelper.h" | ||
#include "AlpakaCore/prefixScan.h" | ||
#include "AlpakaDataFormats/gpuClusteringConstants.h" | ||
|
||
namespace gpuClustering { | ||
|
||
struct clusterChargeCut { | ||
template <typename T_Acc> | ||
ALPAKA_FN_ACC void operator()( | ||
const T_Acc& acc, | ||
uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster) | ||
uint16_t const* __restrict__ adc, // charge of each pixel | ||
uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module | ||
uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module | ||
uint32_t const* __restrict__ moduleId, // module id of each module | ||
int32_t* __restrict__ clusterId, // modified: cluster id of each pixel | ||
const uint32_t numElements) const { | ||
const uint32_t blockIdx(alpaka::idx::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]); | ||
if (blockIdx >= moduleStart[0]) | ||
return; | ||
|
||
auto firstPixel = moduleStart[1 + blockIdx]; | ||
auto thisModuleId = id[firstPixel]; | ||
assert(thisModuleId < MaxNumModules); | ||
assert(thisModuleId == moduleId[blockIdx]); | ||
|
||
auto nclus = nClustersInModule[thisModuleId]; | ||
if (nclus == 0) | ||
return; | ||
|
||
const uint32_t threadIdxLocal(alpaka::idx::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]); | ||
if (threadIdxLocal == 0 && nclus > MaxNumClustersPerModules) | ||
printf("Warning too many clusters in module %d in block %d: %d > %d\n", | ||
thisModuleId, | ||
blockIdx, | ||
nclus, | ||
MaxNumClustersPerModules); | ||
|
||
// Stride = block size. | ||
const uint32_t blockDimension(alpaka::workdiv::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]); | ||
|
||
// Get thread / CPU element indices in block. | ||
const auto& [firstElementIdxNoStride, endElementIdxNoStride] = | ||
cms::alpakatools::element_index_range_in_block(acc, Vec1::all(firstPixel)); | ||
|
||
if (nclus > MaxNumClustersPerModules) { | ||
uint32_t firstElementIdx = firstElementIdxNoStride[0u]; | ||
uint32_t endElementIdx = endElementIdxNoStride[0u]; | ||
// remove excess FIXME find a way to cut charge first.... | ||
for (uint32_t i = firstElementIdx; i < numElements; ++i) { | ||
if (!cms::alpakatools::get_next_element_1D_index_stride( | ||
i, firstElementIdx, endElementIdx, blockDimension, numElements)) | ||
break; | ||
if (id[i] == InvId) | ||
continue; // not valid | ||
if (id[i] != thisModuleId) | ||
break; // end of module | ||
if (clusterId[i] >= MaxNumClustersPerModules) { | ||
id[i] = InvId; | ||
clusterId[i] = InvId; | ||
} | ||
} | ||
nclus = MaxNumClustersPerModules; | ||
} | ||
|
||
#ifdef GPU_DEBUG | ||
if (thisModuleId % 100 == 1) | ||
if (threadIdxLocal == 0) | ||
printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx); | ||
#endif | ||
|
||
auto&& charge = alpaka::block::shared::st::allocVar<int32_t[MaxNumClustersPerModules], __COUNTER__>(acc); | ||
auto&& ok = alpaka::block::shared::st::allocVar<uint8_t[MaxNumClustersPerModules], __COUNTER__>(acc); | ||
auto&& newclusId = alpaka::block::shared::st::allocVar<uint16_t[MaxNumClustersPerModules], __COUNTER__>(acc); | ||
|
||
assert(nclus <= MaxNumClustersPerModules); | ||
cms::alpakatools::for_each_element_1D_block_stride(acc, nclus, [&](uint32_t i) { charge[i] = 0; }); | ||
alpaka::block::sync::syncBlockThreads(acc); | ||
|
||
uint32_t firstElementIdx = firstElementIdxNoStride[0u]; | ||
uint32_t endElementIdx = endElementIdxNoStride[0u]; | ||
for (uint32_t i = firstElementIdx; i < numElements; ++i) { | ||
if (!cms::alpakatools::get_next_element_1D_index_stride( | ||
i, firstElementIdx, endElementIdx, blockDimension, numElements)) | ||
break; | ||
if (id[i] == InvId) | ||
continue; // not valid | ||
if (id[i] != thisModuleId) | ||
break; // end of module | ||
alpaka::atomic::atomicOp<alpaka::atomic::op::Add>(acc, &charge[clusterId[i]], static_cast<int32_t>(adc[i])); | ||
} | ||
alpaka::block::sync::syncBlockThreads(acc); | ||
|
||
auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?) | ||
cms::alpakatools::for_each_element_1D_block_stride( | ||
acc, nclus, [&](uint32_t i) { newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; }); | ||
alpaka::block::sync::syncBlockThreads(acc); | ||
|
||
// renumber | ||
auto&& ws = alpaka::block::shared::st::allocVar<uint16_t[32], __COUNTER__>(acc); | ||
cms::alpakatools::blockPrefixScan(acc, newclusId, nclus, ws); | ||
|
||
assert(nclus >= newclusId[nclus - 1]); | ||
|
||
if (nclus == newclusId[nclus - 1]) | ||
return; | ||
|
||
nClustersInModule[thisModuleId] = newclusId[nclus - 1]; | ||
alpaka::block::sync::syncBlockThreads(acc); | ||
|
||
// mark bad cluster again | ||
cms::alpakatools::for_each_element_1D_block_stride(acc, nclus, [&](uint32_t i) { | ||
if (0 == ok[i]) | ||
newclusId[i] = InvId + 1; | ||
}); | ||
alpaka::block::sync::syncBlockThreads(acc); | ||
|
||
// reassign id | ||
firstElementIdx = firstElementIdxNoStride[0u]; | ||
endElementIdx = endElementIdxNoStride[0u]; | ||
for (uint32_t i = firstElementIdx; i < numElements; ++i) { | ||
if (!cms::alpakatools::get_next_element_1D_index_stride( | ||
i, firstElementIdx, endElementIdx, blockDimension, numElements)) | ||
break; | ||
if (id[i] == InvId) | ||
continue; // not valid | ||
if (id[i] != thisModuleId) | ||
break; // end of module | ||
clusterId[i] = newclusId[clusterId[i]] - 1; | ||
if (clusterId[i] == InvId) | ||
id[i] = InvId; | ||
} | ||
|
||
//done | ||
} | ||
}; | ||
|
||
} // namespace gpuClustering | ||
|
||
#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h |
Oops, something went wrong.