From 611f54dce73a8e42c575e7ae2f95b1d729085179 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Mon, 14 Mar 2022 12:12:23 -0500 Subject: [PATCH] Fallback to ARENA if ASYNC configured and driver < 11.5.0 (#4947) * Warn if using CUDA driver less than 11.5.0 with ASYNC Signed-off-by: Alessandro Bellina * Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala Co-authored-by: Jason Lowe Co-authored-by: Jason Lowe --- .../com/nvidia/spark/rapids/RapidsConf.scala | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 8a0bce5bee1..d1d38f6b3c6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1492,14 +1492,23 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val isPooledMemEnabled: Boolean = get(POOLED_MEM) lazy val rmmPool: String = { - val pool = get(RMM_POOL) - if ("ASYNC".equalsIgnoreCase(pool) && - (Cuda.getRuntimeVersion < 11020 || Cuda.getDriverVersion < 11020)) { - logWarning("CUDA runtime/driver does not support the ASYNC allocator, falling back to ARENA") - "ARENA" - } else { - pool + var pool = get(RMM_POOL) + if ("ASYNC".equalsIgnoreCase(pool)) { + val driverVersion = Cuda.getDriverVersion + val runtimeVersion = Cuda.getRuntimeVersion + var fallbackMessage: Option[String] = None + if (runtimeVersion < 11020 || driverVersion < 11020) { + fallbackMessage = Some("CUDA runtime/driver does not support the ASYNC allocator") + } else if (driverVersion < 11050) { + fallbackMessage = Some("CUDA drivers before 11.5 have known incompatibilities with " + + "the ASYNC allocator") + } + if (fallbackMessage.isDefined) { + logWarning(s"${fallbackMessage.get}, falling back to ARENA") + pool = "ARENA" + } } + pool } lazy val rmmAllocFraction: Double = get(RMM_ALLOC_FRACTION)