Skip to content

Commit

Permalink
Merge pull request cms-sw#44458 from fwyzard/improve_alpaka_Pixel_mem…
Browse files Browse the repository at this point in the history
…ory_usage_141x

Improve the memory usage in the alpaka pixel reconstruction
  • Loading branch information
cmsbuild authored Mar 21, 2024
2 parents 86d37c3 + 70494ae commit 7911d8c
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@ class TrackingRecHitDevice : public PortableDeviceCollection<TrackingRecHitLayou
: PortableDeviceCollection<TrackingRecHitLayout<TrackerTraits>, TDev>(nHits, queue), offsetBPIX2_{offsetBPIX2} {
const auto device = alpaka::getDev(queue);

auto start_h = cms::alpakatools::make_host_view(hitsModuleStart, TrackerTraits::numberOfModules + 1);
auto start_h = cms::alpakatools::make_device_view(device, hitsModuleStart, TrackerTraits::numberOfModules + 1);
auto start_d =
cms::alpakatools::make_device_view(device, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1);
alpaka::memcpy(queue, start_d, start_h);

auto off_h = cms::alpakatools::make_host_view(offsetBPIX2);
auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_);
auto off_d = cms::alpakatools::make_device_view(device, view().offsetBPIX2());
alpaka::memcpy(queue, off_d, off_h);
alpaka::wait(queue);
}

uint32_t nHits() const { return view().metadata().size(); }
Expand Down
11 changes: 7 additions & 4 deletions DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@ int main() {
{
uint32_t nHits = 2000;
int32_t offset = 100;
uint32_t moduleStart[pixelTopology::Phase1::numberOfModules + 1];

auto moduleStartH =
cms::alpakatools::make_host_buffer<uint32_t[]>(queue, pixelTopology::Phase1::numberOfModules + 1);
for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; ++i) {
moduleStart[i] = i * 2;
moduleStartH[i] = i * 2;
}
TrackingRecHitsSoACollection<pixelTopology::Phase1> tkhit(queue, nHits, offset, moduleStart);
auto moduleStartD =
cms::alpakatools::make_device_buffer<uint32_t[]>(queue, pixelTopology::Phase1::numberOfModules + 1);
alpaka::memcpy(queue, moduleStartD, moduleStartH);
TrackingRecHitsSoACollection<pixelTopology::Phase1> tkhit(queue, nHits, offset, moduleStartD.data());

testTrackingRecHitSoA::runKernels<pixelTopology::Phase1>(tkhit.view(), queue);
tkhit.updateFromDevice(queue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ namespace cms::alpakatools {
nOnes,
nblocks,
ppsws,
alpaka::getWarpSizes(alpaka::getDev(queue))[0]);
alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
} else {
h->finalize();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ int main() {
for (auto const& device : devices) {
std::cout << "Test prefix scan on " << alpaka::getName(device) << '\n';
auto queue = Queue(device);
const auto warpSize = alpaka::getWarpSizes(device)[0];
const auto warpSize = alpaka::getPreferredWarpSize(device);
// WARP PREFIXSCAN (OBVIOUSLY GPU-ONLY)
if constexpr (!requires_single_thread_per_block_v<Acc1D>) {
std::cout << "warp level" << std::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

template <typename TrackerTraits>
void SiPixelRawToCluster<TrackerTraits>::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) {
[[maybe_unused]] auto const& hMap = iSetup.getData(mapToken_);
auto const& hMap = iSetup.getData(mapToken_);
auto const& dGains = iSetup.getData(gainsToken_);
auto gains = SiPixelGainCalibrationForHLTDevice(1, iEvent.queue());
auto modulesToUnpackRegional =
cms::alpakatools::make_device_buffer<unsigned char[]>(iEvent.queue(), ::pixelgpudetails::MAX_SIZE);
const unsigned char* modulesToUnpack;

// initialize cabling map or update if necessary
if (recordWatcher_.check(iSetup)) {
// cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel)
Expand All @@ -151,6 +148,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
cabling_ = cablingMap_->cablingTree();
LogDebug("map version:") << cablingMap_->version();
}

// if used, the buffer is guaranteed to stay alive until the after the execution of makePhase1ClustersAsync completes
std::optional<cms::alpakatools::device_buffer<Device, unsigned char[]>> modulesToUnpackRegional;
const unsigned char* modulesToUnpack;
if (regions_) {
regions_->run(iEvent, iSetup);
LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs();
Expand All @@ -159,7 +160,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

modulesToUnpackRegional = SiPixelMappingUtilities::getModToUnpRegionalAsync(
*(regions_->modulesToUnpack()), cabling_.get(), fedIds_, iEvent.queue());
modulesToUnpack = modulesToUnpackRegional.data();
modulesToUnpack = modulesToUnpackRegional->data();
} else {
modulesToUnpack = hMap->modToUnpDefault();
}
Expand Down Expand Up @@ -235,7 +236,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
return;

// copy the FED data to a single cpu buffer
pixelDetails::WordFedAppender wordFedAppender(nDigis_);
pixelDetails::WordFedAppender wordFedAppender(iEvent.queue(), nDigis_);
for (uint32_t i = 0; i < fedIds_.size(); ++i) {
wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

class WordFedAppender {
public:
WordFedAppender();
~WordFedAppender() = default;

WordFedAppender(uint32_t words)
: word_{cms::alpakatools::make_host_buffer<unsigned int[], Platform>(words)},
fedId_{cms::alpakatools::make_host_buffer<unsigned char[], Platform>(words)} {};
WordFedAppender(Queue& queue, uint32_t words)
: word_{cms::alpakatools::make_host_buffer<unsigned int[]>(queue, words)},
fedId_{cms::alpakatools::make_host_buffer<unsigned char[]>(queue, words)} {};

void initializeWordFed(int fedId, unsigned int wordCounterGPU, const uint32_t* src, unsigned int length) {
std::memcpy(word_.data() + wordCounterGPU, src, sizeof(uint32_t) * length);
Expand Down

0 comments on commit 7911d8c

Please sign in to comment.