Skip to content

Commit

Permalink
Guarantee that cache returns only occurred events
Browse files Browse the repository at this point in the history
  • Loading branch information
makortel committed Dec 17, 2019
1 parent 0a5c1ab commit e501505
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
9 changes: 8 additions & 1 deletion HeterogeneousCore/CUDAUtilities/interface/CUDAEventCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,19 @@ namespace cudautils {
CUDAEventCache();

// Gets a (cached) CUDA event for the current device. The event
// will be returned to the cache by the shared_ptr destructor.
// will be returned to the cache by the shared_ptr destructor. The
// returned event is guaranteed to be "occurred", i.e.
// cudaEventQuery() == cudaSuccess.
//
// This function is thread safe
SharedEventPtr getCUDAEvent();

private:
friend class ::CUDAService;

// thread safe
SharedEventPtr makeOrGet(int dev);

// not thread safe, intended to be called only from CUDAService destructor
void clear();

Expand Down
30 changes: 29 additions & 1 deletion HeterogeneousCore/CUDAUtilities/src/CUDAEventCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,36 @@ namespace cudautils {

SharedEventPtr CUDAEventCache::getCUDAEvent() {
const auto dev = cudautils::currentDevice();
auto event = makeOrGet(dev);
auto ret = cudaEventQuery(event.get());
// event is occurred, return immediately
if (ret == cudaSuccess) {
return event;
}
// return code is something else than "recorded", throw exception
if (ret != cudaErrorNotReady) {
cudaCheck(ret);
}

// Got recorded, but not yet occurred event. Try until we get an
// occurred event. Need to keep all recorded events until an
// occurred event is found in order to avoid ping-pong with a
// recorded event.
std::vector<SharedEventPtr> ptrs{std::move(event)};
do {
event = makeOrGet(dev);
ret = cudaEventQuery(event.get());
if (ret == cudaErrorNotReady) {
ptrs.emplace_back(std::move(event));
} else if (ret != cudaSuccess) {
cudaCheck(ret);
}
} while (ret != cudaSuccess);
return event;
}

SharedEventPtr CUDAEventCache::makeOrGet(int dev) {
return cache_[dev].makeOrGet([dev]() {
// TODO(?): We should not return a recorded, but not-yet-occurred event
cudaEvent_t event;
// it should be a bit faster to ignore timings
cudaCheck(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
Expand Down

0 comments on commit e501505

Please sign in to comment.