forked from cms-sw/cmssw
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create CUDA events for CUDAProduct only when needed (#292)
Do not create event if the CUDA stream is idle, i.e. has already finished all work that was queued, at the point when data products are wrapped/emplaced for/to edm::Event. When creating an event, create only a single event per producer, i.e. all products of a producer share the same event. Include a unit test checking the assumed behaviour of CUDA events and streams.
- Loading branch information
Showing
11 changed files
with
199 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
<bin file="test*.cc test*.cu" name="testHeterogeneousCoreCUDACore"> | ||
<bin file="test_*.cc test_*.cu" name="testHeterogeneousCoreCUDACore"> | ||
<use name="FWCore/TestProcessor"/> | ||
<use name="HeterogeneousCore/CUDACore"/> | ||
<use name="catch2"/> | ||
<use name="cuda"/> | ||
</bin> | ||
<bin file="testStreamEvent.cu" name="testHeterogeneousCoreCUDACoreStreamEvent"> | ||
<use name="HeterogeneousCore/CUDAUtilities"/> | ||
<use name="cuda"/> | ||
</bin> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/** | ||
* The purpose of this test program is to ensure that the logic for | ||
* CUDA event use in CUDAProduct and CUDAScopedContext | ||
*/ | ||
|
||
#include <iostream> | ||
#include <memory> | ||
#include <type_traits> | ||
#include <chrono> | ||
#include <thread> | ||
#include <cassert> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" | ||
|
||
namespace { | ||
constexpr int ARRAY_SIZE = 20000000; | ||
constexpr int NLOOPS = 10; | ||
} | ||
|
||
__global__ void kernel_looping(float* point, unsigned int num) { | ||
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; | ||
|
||
for(int iloop=0; iloop<NLOOPS; ++iloop) { | ||
for (size_t offset = idx; offset < num; offset += gridDim.x * blockDim.x) { | ||
point[offset] += 1; | ||
} | ||
} | ||
} | ||
|
||
int main() { | ||
exitSansCUDADevices(); | ||
|
||
constexpr bool debug = false; | ||
|
||
float *dev_points1; | ||
float *host_points1; | ||
cudaStream_t stream1, stream2; | ||
cudaEvent_t event1, event2; | ||
|
||
cudaMalloc(&dev_points1, ARRAY_SIZE * sizeof(float)); | ||
cudaMallocHost(&host_points1, ARRAY_SIZE * sizeof(float)); | ||
cudaStreamCreateWithFlags(&stream1, cudaStreamNonBlocking); | ||
cudaStreamCreateWithFlags(&stream2, cudaStreamNonBlocking); | ||
cudaEventCreate(&event1); | ||
cudaEventCreate(&event2); | ||
|
||
for (size_t j = 0; j < ARRAY_SIZE; ++j) { | ||
host_points1[j] = static_cast<float>(j); | ||
} | ||
|
||
cudaMemcpyAsync(dev_points1, host_points1, | ||
ARRAY_SIZE * sizeof(float), | ||
cudaMemcpyHostToDevice, stream1); | ||
kernel_looping<<<1, 16, 0, stream1>>>(dev_points1, ARRAY_SIZE); | ||
if(debug) std::cout << "Kernel launched on stream1" << std::endl; | ||
|
||
auto status = cudaStreamQuery(stream1); | ||
if(debug) std::cout << "Stream1 busy? " << (status == cudaErrorNotReady) << " idle? " << (status == cudaSuccess) << std::endl; | ||
cudaEventRecord(event1, stream1); | ||
status = cudaEventQuery(event1); | ||
if (debug) std::cout << "Event1 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaErrorNotReady); | ||
|
||
status = cudaStreamQuery(stream2); | ||
if(debug) std::cout << "Stream2 busy? " << (status == cudaErrorNotReady) << " idle? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaSuccess); | ||
if(debug) { | ||
cudaEventRecord(event2, stream2); | ||
status = cudaEventQuery(event2); | ||
std::cout << "Event2 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
status = cudaEventQuery(event2); | ||
std::cout << "Event2 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
} | ||
|
||
cudaStreamWaitEvent(stream2, event1, 0); | ||
if(debug) std::cout << "\nStream2 waiting for event1" << std::endl; | ||
status = cudaStreamQuery(stream2); | ||
if(debug) std::cout << "Stream2 busy? " << (status == cudaErrorNotReady) << " idle? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaErrorNotReady); | ||
cudaEventRecord(event2, stream2); | ||
status = cudaEventQuery(event2); | ||
if(debug) std::cout << "Event2 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaErrorNotReady); | ||
if(debug) { | ||
std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
status = cudaEventQuery(event2); | ||
std::cout << "Event2 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
} | ||
|
||
status = cudaStreamQuery(stream1); | ||
if(debug) { | ||
std::cout << "\nStream1 busy? " << (status == cudaErrorNotReady) << " idle? " << (status == cudaSuccess) << std::endl; | ||
std::cout << "Synchronizing stream1" << std::endl; | ||
} | ||
assert(status == cudaErrorNotReady); | ||
cudaStreamSynchronize(stream1); | ||
if(debug) std::cout << "Synchronized stream1" << std::endl; | ||
|
||
status = cudaEventQuery(event1); | ||
if(debug) std::cout << "Event1 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaSuccess); | ||
status = cudaEventQuery(event2); | ||
if(debug) std::cout << "Event2 recorded? " << (status == cudaErrorNotReady) << " occurred? " << (status == cudaSuccess) << std::endl; | ||
assert(status == cudaSuccess); | ||
|
||
cudaFree(dev_points1); | ||
cudaFreeHost(host_points1); | ||
cudaStreamDestroy(stream1); | ||
cudaStreamDestroy(stream2); | ||
cudaEventDestroy(event1); | ||
cudaEventDestroy(event2); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.