Skip to content

Commit

Permalink
[atomicCAS] exclude h2d copy from the timing measurement
Browse files Browse the repository at this point in the history
  • Loading branch information
Jin Z committed Apr 19, 2023
1 parent d4876a9 commit bd2f002
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
9 changes: 6 additions & 3 deletions atomicCAS-cuda/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

template <typename T>
void testMin (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);
atomicMinDerived<T><<<NUM_BLOCKS, BLOCK_SIZE>>> (d_ptr);
}

Expand All @@ -37,10 +38,11 @@ void testMin (T *h_ptr, T *d_ptr, const int repeat, const char* name) {

template <typename T>
void testMax (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);
atomicMaxDerived<T><<<NUM_BLOCKS, BLOCK_SIZE>>> (d_ptr);
}

Expand All @@ -55,10 +57,11 @@ void testMax (T *h_ptr, T *d_ptr, const int repeat, const char* name) {

template <typename T>
void testAdd (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
cudaMemcpy(d_ptr, h_ptr, sizeof(T), cudaMemcpyHostToDevice);
atomicAddDerived<T><<<NUM_BLOCKS, BLOCK_SIZE>>> (d_ptr);
}

Expand Down
9 changes: 6 additions & 3 deletions atomicCAS-hip/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

template <typename T>
void testMin (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);
hipLaunchKernelGGL(HIP_KERNEL_NAME(atomicMinDerived<T>), NUM_BLOCKS, BLOCK_SIZE, 0, 0, d_ptr);
}

Expand All @@ -37,10 +38,11 @@ void testMin (T *h_ptr, T *d_ptr, const int repeat, const char* name) {

template <typename T>
void testMax (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);
hipLaunchKernelGGL(HIP_KERNEL_NAME(atomicMaxDerived<T>), NUM_BLOCKS, BLOCK_SIZE, 0, 0, d_ptr);
}

Expand All @@ -55,10 +57,11 @@ void testMax (T *h_ptr, T *d_ptr, const int repeat, const char* name) {

template <typename T>
void testAdd (T *h_ptr, T *d_ptr, const int repeat, const char* name) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);

auto start = std::chrono::steady_clock::now();

for (int n = 0; n < repeat; n++) {
hipMemcpy(d_ptr, h_ptr, sizeof(T), hipMemcpyHostToDevice);
hipLaunchKernelGGL(HIP_KERNEL_NAME(atomicAddDerived<T>), NUM_BLOCKS, BLOCK_SIZE, 0, 0, d_ptr);
}

Expand Down

0 comments on commit bd2f002

Please sign in to comment.