From c4c9404e14a837857f834e94c7a3e34afaf558b7 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Mon, 14 Mar 2022 08:58:29 -0500 Subject: [PATCH 1/2] Warn if using CUDA driver less than 11.5.0 with ASYNC Signed-off-by: Alessandro Bellina --- .../com/nvidia/spark/rapids/RapidsConf.scala | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 8a0bce5bee1..652217647be 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1492,14 +1492,23 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val isPooledMemEnabled: Boolean = get(POOLED_MEM) lazy val rmmPool: String = { - val pool = get(RMM_POOL) - if ("ASYNC".equalsIgnoreCase(pool) && - (Cuda.getRuntimeVersion < 11020 || Cuda.getDriverVersion < 11020)) { - logWarning("CUDA runtime/driver does not support the ASYNC allocator, falling back to ARENA") - "ARENA" - } else { - pool + var pool = get(RMM_POOL) + val driverVersion = Cuda.getDriverVersion + val runtimeVersion = Cuda.getRuntimeVersion + var fallbackMessage: Option[String] = None + if ("ASYNC".equalsIgnoreCase(pool)) { + if (runtimeVersion < 11020 || driverVersion < 11020) { + fallbackMessage = Some("CUDA runtime/driver does not support the ASYNC allocator") + } else if (driverVersion < 11050) { + fallbackMessage = Some("CUDA drivers before 11.5 have known incompatibilities with " + + "the ASYNC allocator") + } + if (fallbackMessage.isDefined) { + logWarning(s"${fallbackMessage.get}, falling back to ARENA") + pool = "ARENA" + } } + pool } lazy val rmmAllocFraction: Double = get(RMM_ALLOC_FRACTION) From 0569e805733da4d4e7e5a78240a78dd5edee14c1 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Mon, 14 Mar 2022 09:51:46 -0500 Subject: [PATCH 2/2] Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala Co-authored-by: Jason Lowe --- .../src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 652217647be..d1d38f6b3c6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1493,10 +1493,10 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val rmmPool: String = { var pool = get(RMM_POOL) - val driverVersion = Cuda.getDriverVersion - val runtimeVersion = Cuda.getRuntimeVersion - var fallbackMessage: Option[String] = None if ("ASYNC".equalsIgnoreCase(pool)) { + val driverVersion = Cuda.getDriverVersion + val runtimeVersion = Cuda.getRuntimeVersion + var fallbackMessage: Option[String] = None if (runtimeVersion < 11020 || driverVersion < 11020) { fallbackMessage = Some("CUDA runtime/driver does not support the ASYNC allocator") } else if (driverVersion < 11050) {