From 2c81bedb4584e60e3166eabdcb98eec2f1d18e47 Mon Sep 17 00:00:00 2001
From: MithunR <mythrocks@gmail.com>
Date: Fri, 1 Apr 2022 15:00:48 -0700
Subject: [PATCH] JNI Bindings to fetch CUDA compute capability versions.
 (#10568)

This commit introduces JNI bindings to retrieve the major and minor CUDA compute capability versions for the current CUDA device.

This feature enables introspection from `spark-rapids` to detect the GPU architecture, for model-specific behaviour.
This is required from NVIDIA/spark-rapids/pull/5122, to work around the erroneous behaviour of JNI `fixed_width_convert_to_rows()` on Pascal GPUs (#10569), (which in turn produces failures like NVIDIA/spark-rapids/issues/4980).

Authors:
   - MithunR (https://github.com/mythrocks)

Approvers:
   - https://github.com/nvdbaranec
   - Jason Lowe (https://github.com/jlowe)
   - Nghia Truong (https://github.com/ttnghia)
---
 java/src/main/java/ai/rapids/cudf/Cuda.java | 38 ++++++++++++++++++++-
 java/src/main/native/src/CudaJni.cpp        | 30 +++++++++++++++-
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Cuda.java b/java/src/main/java/ai/rapids/cudf/Cuda.java
index 5e3722d50b7..21843527fc2 100755
--- a/java/src/main/java/ai/rapids/cudf/Cuda.java
+++ b/java/src/main/java/ai/rapids/cudf/Cuda.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -386,6 +386,42 @@ static void asyncMemcpy(long dst, long src, long count, CudaMemcpyKind kind) {
    */
   static native int getNativeComputeMode() throws CudaException;
 
+  /**
+   * Gets the major CUDA compute capability of the current device.
+   * 
+   * For reference: https://developer.nvidia.com/cuda-gpus
+   * Hardware Generation	Compute Capability
+   *     Ampere	                8.x
+   *     Turing	                7.5
+   *     Volta	                7.0, 7.2
+   *     Pascal	                6.x
+   *     Maxwell                5.x
+   *     Kepler	                3.x
+   *     Fermi	                2.x
+   * 
+   * @return The Major compute capability version number of the current CUDA device
+   * @throws CudaException on any error
+   */
+  public static native int getComputeCapabilityMajor() throws CudaException;  
+
+  /**
+   * Gets the minor CUDA compute capability of the current device.
+   * 
+   * For reference: https://developer.nvidia.com/cuda-gpus
+   * Hardware Generation	Compute Capability
+   *     Ampere	                8.x
+   *     Turing	                7.5
+   *     Volta	                7.0, 7.2
+   *     Pascal	                6.x
+   *     Maxwell                5.x
+   *     Kepler	                3.x
+   *     Fermi	                2.x
+   * 
+   * @return The Minor compute capability version number of the current CUDA device
+   * @throws CudaException on any error
+   */
+  public static native int getComputeCapabilityMinor() throws CudaException;
+
   /**
    * Calls cudaFree(0). This can be used to initialize the GPU after a setDevice()
    * @throws CudaException on any error
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index e548b4ce65c..9862c3bface 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -195,6 +195,34 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv *env
   CATCH_STD(env, -2);
 }
 
+JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMajor(JNIEnv *env, jclass) {
+  try {
+    cudf::jni::auto_set_device(env);
+    int device;
+    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    int attribute_value;
+    JNI_CUDA_TRY(
+        env, -2,
+        ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMajor, device));
+    return attribute_value;
+  }
+  CATCH_STD(env, -2);
+}
+
+JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMinor(JNIEnv *env, jclass) {
+  try {
+    cudf::jni::auto_set_device(env);
+    int device;
+    JNI_CUDA_TRY(env, -2, ::cudaGetDevice(&device));
+    int attribute_value;
+    JNI_CUDA_TRY(
+        env, -2,
+        ::cudaDeviceGetAttribute(&attribute_value, ::cudaDevAttrComputeCapabilityMinor, device));
+    return attribute_value;
+  }
+  CATCH_STD(env, -2);
+}
+
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_freeZero(JNIEnv *env, jclass) {
   try {
     cudf::jni::auto_set_device(env);