From 6727f2a2b8f07c0e3d4006869ca3c23e96af22b4 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 12 Aug 2020 18:10:19 -0700 Subject: [PATCH] reduce cudaDeviceSynchronize calls --- thrust/system/cuda/detail/par.h | 25 ------------------------- thrust/system/cuda/detail/util.h | 27 ++++++++++++++++++++------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/thrust/system/cuda/detail/par.h b/thrust/system/cuda/detail/par.h index 1e3be070f..d232a6cfa 100644 --- a/thrust/system/cuda/detail/par.h +++ b/thrust/system/cuda/detail/par.h @@ -69,31 +69,6 @@ struct execute_on_stream_base : execution_policy { return exec.stream; } - - friend __host__ __device__ - cudaError_t - synchronize_stream(execute_on_stream_base &exec) - { - cudaError_t result; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - cudaStreamSynchronize(exec.stream); - result = cudaGetLastError(); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - THRUST_UNUSED_VAR(exec); - cudaDeviceSynchronize(); - result = cudaGetLastError(); - #else - THRUST_UNUSED_VAR(exec); - result = cudaSuccess; - #endif - #endif - } - return result; - } }; struct execute_on_stream : execute_on_stream_base diff --git a/thrust/system/cuda/detail/util.h b/thrust/system/cuda/detail/util.h index b2c9839d1..07ee7d9a1 100644 --- a/thrust/system/cuda/detail/util.h +++ b/thrust/system/cuda/detail/util.h @@ -72,14 +72,27 @@ __thrust_exec_check_disable__ template __host__ __device__ cudaError_t -synchronize_stream(execution_policy &) +synchronize_stream(execution_policy &policy) { - #if __THRUST_HAS_CUDART__ - cudaDeviceSynchronize(); - return cudaGetLastError(); - #else - return cudaSuccess; - #endif + cudaError_t result; + if (THRUST_IS_HOST_CODE) { + #if THRUST_INCLUDE_HOST_CODE + cudaStreamSynchronize(stream(policy)); + result = cudaGetLastError(); + #endif + } else { + #if THRUST_INCLUDE_DEVICE_CODE + #if __THRUST_HAS_CUDART__ + THRUST_UNUSED_VAR(policy); + cudaDeviceSynchronize(); + result = cudaGetLastError(); + #else + THRUST_UNUSED_VAR(policy); + result = cudaSuccess; + #endif + #endif + } + return result; } // Entry point/interface.