From b48b801201ea43254b5e3f9550656f6fa7c4adbe Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Fri, 22 Nov 2024 07:31:01 -0800 Subject: [PATCH] remove extra sync, and make sure copyNext is always synchronous with the cuda stream --- .../com/nvidia/spark/rapids/spill/SpillFramework.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/spill/SpillFramework.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/spill/SpillFramework.scala index 58f39ec56ee..5e62e908d53 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/spill/SpillFramework.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/spill/SpillFramework.scala @@ -205,9 +205,8 @@ object SpillableHostBufferHandle extends Logging { while (chunkedPacker.hasNext) { withResource(chunkedPacker.next(bb)) { n => builder.copyNext(n, Cuda.DEFAULT_STREAM) - // we are calling chunked packer on `bb` again each time, we need - // to synchronize before we ask for the next chunk - Cuda.DEFAULT_STREAM.sync() + // copyNext is synchronous w.r.t. the cuda stream passed, + // no need to synchronize here. } } } @@ -221,7 +220,6 @@ object SpillableHostBufferHandle extends Logging { withResource( SpillFramework.stores.hostStore.makeBuilder(handle)) { builder => builder.copyNext(buff, Cuda.DEFAULT_STREAM) - Cuda.DEFAULT_STREAM.sync() builder.build } } @@ -1163,7 +1161,7 @@ class SpillableHostStore(val maxSize: Option[Long] = None) override def copyNext(mb: DeviceMemoryBuffer, stream: Cuda.Stream): Unit = { GpuTaskMetrics.get.spillToHostTime { - singleShotBuffer.copyFromMemoryBufferAsync( + singleShotBuffer.copyFromMemoryBuffer( copied, mb, 0,