Skip to content

Commit

Permalink
Rename exitSansCUDADevices to requireCUDADevices (#423)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmsbuild authored and fwyzard committed Jan 13, 2021
1 parent 6793479 commit ce182b4
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 46 deletions.
6 changes: 3 additions & 3 deletions CUDADataFormats/Track/src/classes.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormats__src_classes_h
#define CUDADataFormats__src_classes_h
#ifndef CUDADataFormats_Track_src_classes_h
#define CUDADataFormats_Track__src_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/Common/interface/HostProduct.h"
#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
#include "CUDADataFormats/Common/interface/ArrayShadow.h"
Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/Track/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<lcgdict>
<class name="CUDAProduct<HeterogeneousSoA<pixelTrack::TrackSoA>>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<HeterogeneousSoA<pixelTrack::TrackSoA>>>" persistent="false"/>
<class name="cms::cuda::Product<HeterogeneousSoA<pixelTrack::TrackSoA>>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<HeterogeneousSoA<pixelTrack::TrackSoA>>>" persistent="false"/>
<class name="HeterogeneousSoA<pixelTrack::TrackSoA>" persistent="false"/>
<class name="edm::Wrapper<HeterogeneousSoA<pixelTrack::TrackSoA>>" persistent="false"/>
<class name="ArrayShadow<std::array<unsigned int,2001>>" persistent="false"/>
Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ __global__ void testTSSoA(TS* pts, int n) {
}

#ifdef __CUDACC__
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#endif

int main() {
#ifdef __CUDACC__
exitSansCUDADevices();
cms::cudatest::requireDevices();
#endif

TS ts;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <cuda_runtime.h>

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/Common/interface/HostProduct.h"
#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
#include "DataFormats/Common/interface/Handle.h"
Expand All @@ -15,7 +15,7 @@
#include "FWCore/PluginManager/interface/ModuleDef.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"

class PixelTrackSoAFromCUDA : public edm::stream::EDProducer<edm::ExternalWork> {
public:
Expand All @@ -30,14 +30,14 @@ class PixelTrackSoAFromCUDA : public edm::stream::EDProducer<edm::ExternalWork>
edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override;

edm::EDGetTokenT<CUDAProduct<PixelTrackHeterogeneous>> tokenCUDA_;
edm::EDGetTokenT<cms::cuda::Product<PixelTrackHeterogeneous>> tokenCUDA_;
edm::EDPutTokenT<PixelTrackHeterogeneous> tokenSOA_;

cudautils::host::unique_ptr<pixelTrack::TrackSoA> m_soa;
cms::cuda::host::unique_ptr<pixelTrack::TrackSoA> m_soa;
};

PixelTrackSoAFromCUDA::PixelTrackSoAFromCUDA(const edm::ParameterSet& iConfig)
: tokenCUDA_(consumes<CUDAProduct<PixelTrackHeterogeneous>>(iConfig.getParameter<edm::InputTag>("src"))),
: tokenCUDA_(consumes<cms::cuda::Product<PixelTrackHeterogeneous>>(iConfig.getParameter<edm::InputTag>("src"))),
tokenSOA_(produces<PixelTrackHeterogeneous>()) {}

void PixelTrackSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
Expand All @@ -50,8 +50,8 @@ void PixelTrackSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& des
void PixelTrackSoAFromCUDA::acquire(edm::Event const& iEvent,
edm::EventSetup const& iSetup,
edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
CUDAProduct<PixelTrackHeterogeneous> const& inputDataWrapped = iEvent.get(tokenCUDA_);
CUDAScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)};
cms::cuda::Product<PixelTrackHeterogeneous> const& inputDataWrapped = iEvent.get(tokenCUDA_);
cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)};
auto const& inputData = ctx.get(inputDataWrapped);

m_soa = inputData.toHostAsync(ctx.stream());
Expand Down
4 changes: 2 additions & 2 deletions RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <Eigen/Eigenvalues>

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"

#ifdef USE_BL
#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h"
Expand Down Expand Up @@ -329,7 +329,7 @@ void testFit() {
}

int main(int argc, char* argv[]) {
exitSansCUDADevices();
cms::cudatest::requireDevices();

testFit<4>();
testFit<3>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <Eigen/Eigenvalues>

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
#include "test_common.h"

using namespace Eigen;
Expand Down Expand Up @@ -215,7 +215,7 @@ void testEigenvalues() {
}

int main(int argc, char *argv[]) {
exitSansCUDADevices();
cms::cudatest::requireDevices();

testEigenvalues();
testInverse3x3();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv,
auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;

// Fit internals
auto hitsGPU_ = cudautils::make_device_unique<double[]>(
auto hitsGPU_ = cms::cuda::make_device_unique<double[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double), stream);
auto hits_geGPU_ = cudautils::make_device_unique<float[]>(
auto hits_geGPU_ = cms::cuda::make_device_unique<float[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float), stream);
auto fast_fit_resultsGPU_ = cudautils::make_device_unique<double[]>(
auto fast_fit_resultsGPU_ = cms::cuda::make_device_unique<double[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double), stream);

for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
Expand Down
17 changes: 9 additions & 8 deletions RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <cuda_runtime.h>

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "DataFormats/Common/interface/Handle.h"
#include "FWCore/Framework/interface/ESHandle.h"
#include "FWCore/Framework/interface/Event.h"
Expand All @@ -15,7 +15,7 @@
#include "FWCore/PluginManager/interface/ModuleDef.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/RunningAverage.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h"

#include "CAHitNtupletGeneratorOnGPU.h"
Expand All @@ -34,8 +34,8 @@ class CAHitNtupletCUDA : public edm::global::EDProducer<> {

bool m_OnGPU;

edm::EDGetTokenT<CUDAProduct<TrackingRecHit2DGPU>> tokenHitGPU_;
edm::EDPutTokenT<CUDAProduct<PixelTrackHeterogeneous>> tokenTrackGPU_;
edm::EDGetTokenT<cms::cuda::Product<TrackingRecHit2DGPU>> tokenHitGPU_;
edm::EDPutTokenT<cms::cuda::Product<PixelTrackHeterogeneous>> tokenTrackGPU_;
edm::EDGetTokenT<TrackingRecHit2DCPU> tokenHitCPU_;
edm::EDPutTokenT<PixelTrackHeterogeneous> tokenTrackCPU_;

Expand All @@ -45,8 +45,9 @@ class CAHitNtupletCUDA : public edm::global::EDProducer<> {
CAHitNtupletCUDA::CAHitNtupletCUDA(const edm::ParameterSet& iConfig)
: m_OnGPU(iConfig.getParameter<bool>("onGPU")), gpuAlgo_(iConfig, consumesCollector()) {
if (m_OnGPU) {
tokenHitGPU_ = consumes<CUDAProduct<TrackingRecHit2DGPU>>(iConfig.getParameter<edm::InputTag>("pixelRecHitSrc"));
tokenTrackGPU_ = produces<CUDAProduct<PixelTrackHeterogeneous>>();
tokenHitGPU_ =
consumes<cms::cuda::Product<TrackingRecHit2DGPU>>(iConfig.getParameter<edm::InputTag>("pixelRecHitSrc"));
tokenTrackGPU_ = produces<cms::cuda::Product<PixelTrackHeterogeneous>>();
} else {
tokenHitCPU_ = consumes<TrackingRecHit2DCPU>(iConfig.getParameter<edm::InputTag>("pixelRecHitSrc"));
tokenTrackCPU_ = produces<PixelTrackHeterogeneous>();
Expand All @@ -68,10 +69,10 @@ void CAHitNtupletCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const
auto bf = 1. / PixelRecoUtilities::fieldInInvGev(es);

if (m_OnGPU) {
edm::Handle<CUDAProduct<TrackingRecHit2DCUDA>> hHits;
edm::Handle<cms::cuda::Product<TrackingRecHit2DCUDA>> hHits;
iEvent.getByToken(tokenHitGPU_, hHits);

CUDAScopedContextProduce ctx{*hHits};
cms::cuda::ScopedContextProduce ctx{*hHits};
auto const& hits = ctx.get(*hHits);

ctx.emplace(iEvent, tokenTrackGPU_, gpuAlgo_.makeTuplesAsync(hits, bf, ctx.stream()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *
assert(tuples_d && quality_d);

// zero tuples
cudautils::launchZero(tuples_d, cudaStream);
cms::cuda::launchZero(tuples_d, cudaStream);

auto nhits = hh.nHits();
assert(nhits <= pixelGPUConstants::maxNumberOfHits);
Expand Down Expand Up @@ -108,13 +108,13 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *
if (m_params.doStats_)
kernel_mark_used(hh.view(), device_theCells_.get(), device_nCells_);

cudautils::finalizeBulk(device_hitTuple_apc_, tuples_d);
cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d);

// remove duplicates (tracks that share a doublet)
kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, quality_d);

kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());
cudautils::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream);
cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream);
kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get());

if (nhits > 1 && m_params.lateFishbone_) {
Expand Down Expand Up @@ -154,7 +154,7 @@ void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA

// fill hit->track "map"
kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());
cudautils::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream);
cms::cuda::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream);
kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get());

// remove duplicates (tracks that share a hit)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA *
auto *quality_d = (Quality *)(&tracks_d->m_quality);

// zero tuples
cudautils::launchZero(tuples_d, cudaStream);
cms::cuda::launchZero(tuples_d, cudaStream);

auto nhits = hh.nHits();
assert(nhits <= pixelGPUConstants::maxNumberOfHits);
Expand Down Expand Up @@ -96,7 +96,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA *

blockSize = 128;
numberOfBlocks = (HitContainer::totbins() + blockSize - 1) / blockSize;
cudautils::finalizeBulk<<<numberOfBlocks, blockSize, 0, cudaStream>>>(device_hitTuple_apc_, tuples_d);
cms::cuda::finalizeBulk<<<numberOfBlocks, blockSize, 0, cudaStream>>>(device_hitTuple_apc_, tuples_d);

// remove duplicates (tracks that share a doublet)
numberOfBlocks = (3 * m_params.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize;
Expand All @@ -108,7 +108,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA *
numberOfBlocks = (3 * CAConstants::maxTuples() / 4 + blockSize - 1) / blockSize;
kernel_countMultiplicity<<<numberOfBlocks, blockSize, 0, cudaStream>>>(
tuples_d, quality_d, device_tupleMultiplicity_.get());
cudautils::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream);
cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream);
kernel_fillMultiplicity<<<numberOfBlocks, blockSize, 0, cudaStream>>>(
tuples_d, quality_d, device_tupleMultiplicity_.get());
cudaCheck(cudaGetLastError());
Expand Down Expand Up @@ -160,7 +160,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr
#endif

// in principle we can use "nhits" to heuristically dimension the workspace...
device_isOuterHitOfCell_ = cudautils::make_device_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
device_isOuterHitOfCell_ = cms::cuda::make_device_unique<GPUCACell::OuterHitOfCell[]>(std::max(1U, nhits), stream);
assert(device_isOuterHitOfCell_.get());
{
int threadsPerBlock = 128;
Expand All @@ -175,7 +175,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr
cudaCheck(cudaGetLastError());
}

device_theCells_ = cudautils::make_device_unique<GPUCACell[]>(m_params.maxNumberOfDoublets_, stream);
device_theCells_ = cms::cuda::make_device_unique<GPUCACell[]>(m_params.maxNumberOfDoublets_, stream);

#ifdef GPU_DEBUG
cudaDeviceSynchronize();
Expand Down Expand Up @@ -252,7 +252,7 @@ void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA
kernel_countHitInTracks<<<numberOfBlocks, blockSize, 0, cudaStream>>>(
tuples_d, quality_d, device_hitToTuple_.get());
cudaCheck(cudaGetLastError());
cudautils::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream);
cms::cuda::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream);
cudaCheck(cudaGetLastError());
kernel_fillHitInTracks<<<numberOfBlocks, blockSize, 0, cudaStream>>>(tuples_d, quality_d, device_hitToTuple_.get());
cudaCheck(cudaGetLastError());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
} else {
*device_nCells_ = 0;
}
cudautils::launchZero(device_tupleMultiplicity_.get(), stream);
cudautils::launchZero(device_hitToTuple_.get(), stream); // we may wish to keep it in the edm...
cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream);
cms::cuda::launchZero(device_hitToTuple_.get(), stream); // we may wish to keep it in the edm...
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription&
PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DCUDA const& hits_d,
float bfield,
cudaStream_t stream) const {
PixelTrackHeterogeneous tracks(cudautils::make_device_unique<pixelTrack::TrackSoA>(stream));
PixelTrackHeterogeneous tracks(cms::cuda::make_device_unique<pixelTrack::TrackSoA>(stream));

auto* soa = tracks.get();

Expand Down
8 changes: 4 additions & 4 deletions RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ void HelixFitOnGPU::launchRiemannKernels(HitsView const *hv,
auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;

// Fit internals
auto hitsGPU_ = cudautils::make_device_unique<double[]>(
auto hitsGPU_ = cms::cuda::make_device_unique<double[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double), stream);
auto hits_geGPU_ = cudautils::make_device_unique<float[]>(
auto hits_geGPU_ = cms::cuda::make_device_unique<float[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float), stream);
auto fast_fit_resultsGPU_ = cudautils::make_device_unique<double[]>(
auto fast_fit_resultsGPU_ = cms::cuda::make_device_unique<double[]>(
maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double), stream);
auto circle_fit_resultsGPU_holder =
cudautils::make_device_unique<char[]>(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit), stream);
cms::cuda::make_device_unique<char[]>(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit), stream);
Rfit::circle_fit *circle_fit_resultsGPU_ = (Rfit::circle_fit *)(circle_fit_resultsGPU_holder.get());

for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
Expand Down

0 comments on commit ce182b4

Please sign in to comment.