From 71d90262c271f9cf7d6430a66bda8463992bfe4f Mon Sep 17 00:00:00 2001
From: Allison Vacanti <alliepiper16@gmail.com>
Date: Wed, 14 Apr 2021 18:37:17 -0400
Subject: [PATCH] Remove CUB_PTX_ARCH, etc from cub::DeviceSpmv.

`cub::DeviceSpmv` has not been ported to use cub::detail::ptx_dispatch
and uses an older dispatch mechanism that is incompatible with nvc++.

It was not ported because there are no tests and thus no way to verify
whether or not the ported version works. Rather than risk breaking
user code, we'll port this later, after we add tests. NV HPC doesn't
require this algorithm so it's low priority for now. Tracked by
NVIDIA/cub#285.
---
 cub/agent/agent_spmv_orig.cuh              |   6 +-
 cub/device/dispatch/dispatch_spmv_orig.cuh | 115 +++++++++++----------
 2 files changed, 64 insertions(+), 57 deletions(-)
diff --git a/cub/agent/agent_spmv_orig.cuh b/cub/agent/agent_spmv_orig.cuh
index 810f893fbe..d50e6463a1 100644
--- a/cub/agent/agent_spmv_orig.cuh
+++ b/cub/agent/agent_spmv_orig.cuh
@@ -420,7 +420,8 @@ struct AgentSpmv
         int         tile_num_rows           = tile_end_coord.x - tile_start_coord.x;
         int         tile_num_nonzeros       = tile_end_coord.y - tile_start_coord.y;
 
-#if (CUB_PTX_ARCH >= 520)
+// Not porting these for NVC++; see note in DispatchSpmv in the "__NVCOMPILER_CUDA__" block.
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 520)
 
         OffsetT*    s_tile_row_end_offsets  = &temp_storage.aliasable.merge_items[0].row_end_offset;
         ValueT*     s_tile_nonzeros         = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero;
@@ -469,9 +470,8 @@ struct AgentSpmv
                 ValueT  value                   = wd_values[tile_start_coord.y + nonzero_idx];
 
                 ValueT  vector_value            = spmv_params.t_vector_x[column_idx];
-#if (CUB_PTX_ARCH >= 350)
+
                 vector_value                    = wd_vector_x[column_idx];
-#endif
                 ValueT  nonzero                 = value * vector_value;
 
                 s_tile_nonzeros[nonzero_idx]    = nonzero;
diff --git a/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/device/dispatch/dispatch_spmv_orig.cuh
index e821ff23e3..f588940c0e 100644
--- a/cub/device/dispatch/dispatch_spmv_orig.cuh
+++ b/cub/device/dispatch/dispatch_spmv_orig.cuh
@@ -372,19 +372,36 @@ struct DispatchSpmv
     // Tuning policies of current PTX compiler pass
     //---------------------------------------------------------------------
 
-#if (CUB_PTX_ARCH >= 600)
-    typedef Policy600 PtxPolicy;
-
-#elif (CUB_PTX_ARCH >= 500)
-    typedef Policy500 PtxPolicy;
-
-#elif (CUB_PTX_ARCH >= 370)
-    typedef Policy370 PtxPolicy;
-
-#else
+#ifdef __NVCOMPILER_CUDA__
+
+    // cub::DeviceSpmv has not been ported to use cub::detail::ptx_dispatch
+    // and uses an older dispatch mechanism that is incompatible with nvc++.
+    //
+    // It was not ported because there are no tests and thus no way to verify
+    // whether or not the ported version works. Rather than risk breaking
+    // users code, we'll port this later, after we add tests. NV HPC doesn't
+    // require this algorithm so it's low priority for now. Tracked by
+    // NVIDIA/cub#285.
+    //
+    // This will fail to compile if the Spmv algorithm is instantiated on nvc++,
+    // but will still build if the header is just included:
+    template <typename> struct dependent_false : std::false_type {};
+    static_assert(dependent_false<ValueT>::value,
+                  "cub::DeviceSpmv is not supported on nvc++.");
+    // Define this so the headers still parse:
     typedef Policy350 PtxPolicy;
 
-#endif
+#else
+    #if (__CUDA_ARCH__ >= 600)
+        typedef Policy600 PtxPolicy;
+    #elif (__CUDA_ARCH__ >= 500)
+        typedef Policy500 PtxPolicy;
+    #elif (__CUDA_ARCH__ >= 370)
+        typedef Policy370 PtxPolicy;
+    #else
+        typedef Policy350 PtxPolicy;
+    #endif
+#endif // __NVCOMPILER_CUDA__
 
     // "Opaque" policies (whose parameterizations aren't reflected in the type signature)
     struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {};
@@ -405,40 +422,34 @@ struct DispatchSpmv
         KernelConfig    &spmv_config,
         KernelConfig    &segment_fixup_config)
     {
-        if (CUB_IS_DEVICE_CODE)
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifdef __CUDA_ARCH__
+        // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
+        spmv_config.template Init<PtxSpmvPolicyT>();
+        segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
+#else
+        // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
+        if (ptx_version >= 600)
         {
-            #if CUB_INCLUDE_DEVICE_CODE
-                // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
-                spmv_config.template Init<PtxSpmvPolicyT>();
-                segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
-            #endif
+            spmv_config.template            Init<typename Policy600::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy600::SegmentFixupPolicyT>();
+        }
+        else if (ptx_version >= 500)
+        {
+            spmv_config.template            Init<typename Policy500::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy500::SegmentFixupPolicyT>();
+        }
+        else if (ptx_version >= 370)
+        {
+            spmv_config.template            Init<typename Policy370::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy370::SegmentFixupPolicyT>();
         }
         else
         {
-            #if CUB_INCLUDE_HOST_CODE
-                // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
-                if (ptx_version >= 600)
-                {
-                    spmv_config.template            Init<typename Policy600::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy600::SegmentFixupPolicyT>();
-                }
-                else if (ptx_version >= 500)
-                {
-                    spmv_config.template            Init<typename Policy500::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy500::SegmentFixupPolicyT>();
-                }
-                else if (ptx_version >= 370)
-                {
-                    spmv_config.template            Init<typename Policy370::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy370::SegmentFixupPolicyT>();
-                }
-                else
-                {
-                    spmv_config.template            Init<typename Policy350::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy350::SegmentFixupPolicyT>();
-                }
-            #endif
+            spmv_config.template            Init<typename Policy350::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy350::SegmentFixupPolicyT>();
         }
+#endif
     }
 
 
@@ -607,13 +618,11 @@ struct DispatchSpmv
             int search_block_size   = INIT_KERNEL_THREADS;
             int search_grid_size    = cub::DivideAndRoundUp(num_merge_tiles + 1, search_block_size);
 
-            #if CUB_INCLUDE_HOST_CODE
-                if (CUB_IS_HOST_CODE)
-                {
-                    // Init textures
-                    if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
-                }
-            #endif
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifndef __CUDA_ARCH__
+            // Init textures
+            if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
+#endif
 
             if (search_grid_size < sm_count)
 //            if (num_merge_tiles < spmv_sm_occupancy * sm_count)
@@ -690,13 +699,11 @@ struct DispatchSpmv
                 if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
             }
 
-            #if CUB_INCLUDE_HOST_CODE
-                if (CUB_IS_HOST_CODE)
-                {
-                    // Free textures
-                    if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
-                }
-            #endif
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifndef __CUDA_ARCH__
+            // Free textures
+            if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
+#endif
         }
         while (0);