From 0489c582c396d6dc34c5a772c2fb08030ebf50b7 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 1 Dec 2020 02:22:18 +0100 Subject: [PATCH] Simplify cudacompat layer to use a 1-dimensional grid (cms-patatrack#586) Remove the possibility of changing the grid size used by the cms::cudacompat layer, and make it a constant equal to {1, 1, 1}. This avoids a thread-related problem caused by TBB using worker threads where the grid size had not been initialised. The kernel for pixel clustering need to be rewritten to support a one-dimensional grid to run on the CPU. Currently they are only used on the GPU in the Patatrack workflows, but they are exercised on the CPU by the gpuClustering_t tests; those tests have been commented out until the kernels can be updated. --- .../SiPixelClusterizer/test/gpuClustering_t.h | 17 +++++++++-------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 2 -- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 8ec665f8960b6..721f08adfcf46 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -10,11 +10,10 @@ #include #ifdef __CUDACC__ - -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -33,7 +32,6 @@ int main(void) { auto h_x = std::make_unique(numElements); auto h_y = std::make_unique(numElements); auto h_adc = std::make_unique(numElements); - auto h_clus = std::make_unique(numElements); #ifdef __CUDACC__ @@ -46,11 +44,9 @@ int main(void) { auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); #else - auto h_moduleStart = std::make_unique(MaxNumModules + 1); auto h_clusInModule = std::make_unique(MaxNumModules); auto h_moduleId = std::make_unique(MaxNumModules); - #endif // later random number @@ -301,9 +297,12 @@ int main(void) { cudaDeviceSynchronize(); #else + h_moduleStart[0] = nModules; countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t)); +#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE + // FIXME the findClus kernel should be rewritten to avoid relying on a predefined grid size gridDim.x = MaxNumModules; //not needed in the kernel for this specific case; assert(blockIdx.x == 0); for (; blockIdx.x < gridDim.x; ++blockIdx.x) @@ -315,7 +314,7 @@ int main(void) { h_moduleId.get(), h_clus.get(), n); - resetGrid(); +#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE nModules = h_moduleStart[0]; auto nclus = h_clusInModule.get(); @@ -330,12 +329,14 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; +#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE + // FIXME the clusterChargeCut kernel should be rewritten to avoid relying on a predefined grid size gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case assert(blockIdx.x == 0); for (; blockIdx.x < gridDim.x; ++blockIdx.x) clusterChargeCut( h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); - resetGrid(); +#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE #endif diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index c7084f325d05b..9505aec154222 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -209,8 +209,6 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // filled creates view SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()}; assert(digiView.adc(0) != 0); - // not needed... - cms::cudacompat::resetGrid(); // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); for (auto h = fc; h < lc; ++h)