From 71d90262c271f9cf7d6430a66bda8463992bfe4f Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Wed, 14 Apr 2021 18:37:17 -0400 Subject: [PATCH] Remove CUB_PTX_ARCH, etc from cub::DeviceSpmv. `cub::DeviceSpmv` has not been ported to use cub::detail::ptx_dispatch and uses an older dispatch mechanism that is incompatible with nvc++. It was not ported because there are no tests and thus no way to verify whether or not the ported version works. Rather than risk breaking user code, we'll port this later, after we add tests. NV HPC doesn't require this algorithm so it's low priority for now. Tracked by NVIDIA/cub#285. --- cub/agent/agent_spmv_orig.cuh | 6 +- cub/device/dispatch/dispatch_spmv_orig.cuh | 115 +++++++++++---------- 2 files changed, 64 insertions(+), 57 deletions(-) diff --git a/cub/agent/agent_spmv_orig.cuh b/cub/agent/agent_spmv_orig.cuh index 810f893fbe..d50e6463a1 100644 --- a/cub/agent/agent_spmv_orig.cuh +++ b/cub/agent/agent_spmv_orig.cuh @@ -420,7 +420,8 @@ struct AgentSpmv int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; -#if (CUB_PTX_ARCH >= 520) +// Not porting these for NVC++; see note in DispatchSpmv in the "__NVCOMPILER_CUDA__" block. +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 520) OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; @@ -469,9 +470,8 @@ struct AgentSpmv ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; -#if (CUB_PTX_ARCH >= 350) + vector_value = wd_vector_x[column_idx]; -#endif ValueT nonzero = value * vector_value; s_tile_nonzeros[nonzero_idx] = nonzero; diff --git a/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/device/dispatch/dispatch_spmv_orig.cuh index e821ff23e3..f588940c0e 100644 --- a/cub/device/dispatch/dispatch_spmv_orig.cuh +++ b/cub/device/dispatch/dispatch_spmv_orig.cuh @@ -372,19 +372,36 @@ struct DispatchSpmv // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- -#if (CUB_PTX_ARCH >= 600) - typedef Policy600 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 500) - typedef Policy500 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 370) - typedef Policy370 PtxPolicy; - -#else +#ifdef __NVCOMPILER_CUDA__ + + // cub::DeviceSpmv has not been ported to use cub::detail::ptx_dispatch + // and uses an older dispatch mechanism that is incompatible with nvc++. + // + // It was not ported because there are no tests and thus no way to verify + // whether or not the ported version works. Rather than risk breaking + // users code, we'll port this later, after we add tests. NV HPC doesn't + // require this algorithm so it's low priority for now. Tracked by + // NVIDIA/cub#285. + // + // This will fail to compile if the Spmv algorithm is instantiated on nvc++, + // but will still build if the header is just included: + template struct dependent_false : std::false_type {}; + static_assert(dependent_false::value, + "cub::DeviceSpmv is not supported on nvc++."); + // Define this so the headers still parse: typedef Policy350 PtxPolicy; -#endif +#else + #if (__CUDA_ARCH__ >= 600) + typedef Policy600 PtxPolicy; + #elif (__CUDA_ARCH__ >= 500) + typedef Policy500 PtxPolicy; + #elif (__CUDA_ARCH__ >= 370) + typedef Policy370 PtxPolicy; + #else + typedef Policy350 PtxPolicy; + #endif +#endif // __NVCOMPILER_CUDA__ // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; @@ -405,40 +422,34 @@ struct DispatchSpmv KernelConfig &spmv_config, KernelConfig &segment_fixup_config) { - if (CUB_IS_DEVICE_CODE) +// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block. +#ifdef __CUDA_ARCH__ + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + spmv_config.template Init(); + segment_fixup_config.template Init(); +#else + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 600) { - #if CUB_INCLUDE_DEVICE_CODE - // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy - spmv_config.template Init(); - segment_fixup_config.template Init(); - #endif + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 500) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 370) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); } else { - #if CUB_INCLUDE_HOST_CODE - // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version - if (ptx_version >= 600) - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - else if (ptx_version >= 500) - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - else if (ptx_version >= 370) - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - else - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - #endif + spmv_config.template Init(); + segment_fixup_config.template Init(); } +#endif } @@ -607,13 +618,11 @@ struct DispatchSpmv int search_block_size = INIT_KERNEL_THREADS; int search_grid_size = cub::DivideAndRoundUp(num_merge_tiles + 1, search_block_size); - #if CUB_INCLUDE_HOST_CODE - if (CUB_IS_HOST_CODE) - { - // Init textures - if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; - } - #endif +// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block. +#ifndef __CUDA_ARCH__ + // Init textures + if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; +#endif if (search_grid_size < sm_count) // if (num_merge_tiles < spmv_sm_occupancy * sm_count) @@ -690,13 +699,11 @@ struct DispatchSpmv if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } - #if CUB_INCLUDE_HOST_CODE - if (CUB_IS_HOST_CODE) - { - // Free textures - if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; - } - #endif +// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block. +#ifndef __CUDA_ARCH__ + // Free textures + if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; +#endif } while (0);