diff --git a/cub/device/dispatch/dispatch_histogram.cuh b/cub/device/dispatch/dispatch_histogram.cuh index 879d5ddec2..6362d1ebd3 100644 --- a/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/device/dispatch/dispatch_histogram.cuh @@ -341,52 +341,6 @@ struct DipatchHistogram }; }; - - /// SM11 - struct Policy110 - { - // HistogramSweepPolicy - typedef AgentHistogramPolicy< - 512, - (NUM_CHANNELS == 1) ? 8 : 2, - BLOCK_LOAD_DIRECT, - LOAD_DEFAULT, - true, - GMEM, - false> - HistogramSweepPolicy; - }; - - /// SM20 - struct Policy200 - { - // HistogramSweepPolicy - typedef AgentHistogramPolicy< - (NUM_CHANNELS == 1) ? 256 : 128, - (NUM_CHANNELS == 1) ? 8 : 3, - (NUM_CHANNELS == 1) ? BLOCK_LOAD_DIRECT : BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - true, - SMEM, - false> - HistogramSweepPolicy; - }; - - /// SM30 - struct Policy300 - { - // HistogramSweepPolicy - typedef AgentHistogramPolicy< - 512, - (NUM_CHANNELS == 1) ? 8 : 2, - BLOCK_LOAD_DIRECT, - LOAD_DEFAULT, - true, - GMEM, - false> - HistogramSweepPolicy; - }; - /// SM35 struct Policy350 { @@ -426,17 +380,8 @@ struct DipatchHistogram #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; -#elif (CUB_PTX_ARCH >= 350) - typedef Policy350 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 300) - typedef Policy300 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 200) - typedef Policy200 PtxPolicy; - #else - typedef Policy110 PtxPolicy; + typedef Policy350 PtxPolicy; #endif @@ -473,21 +418,9 @@ struct DipatchHistogram { result = histogram_sweep_config.template Init(); } - else if (ptx_version >= 350) - { - result = histogram_sweep_config.template Init(); - } - else if (ptx_version >= 300) - { - result = histogram_sweep_config.template Init(); - } - else if (ptx_version >= 200) - { - result = histogram_sweep_config.template Init(); - } else { - result = histogram_sweep_config.template Init(); + result = histogram_sweep_config.template Init(); } #endif } diff --git a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh index 2b0919fa1c..bfe3b3d252 100644 --- a/cub/device/dispatch/dispatch_radix_sort.cuh +++ b/cub/device/dispatch/dispatch_radix_sort.cuh @@ -529,98 +529,8 @@ struct DeviceRadixSortPolicy // Architecture-specific tuning policies //------------------------------------------------------------------------------ - /// SM20 - struct Policy200 : ChainedPolicy<200, Policy200, Policy200> - { - enum { - PRIMARY_RADIX_BITS = 5, - ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, - - // Relative size of KeyT type to a 4-byte word - SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4, - }; - - // Keys-only upsweep policies - typedef AgentRadixSortUpsweepPolicy <64, 18, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; - typedef AgentRadixSortUpsweepPolicy <64, 18, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; - - // Key-value pairs upsweep policies - typedef AgentRadixSortUpsweepPolicy <128, 13, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; - typedef AgentRadixSortUpsweepPolicy <128, 13, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; - - // Upsweep policies - typedef typename If::Type UpsweepPolicy; - typedef typename If::Type AltUpsweepPolicy; - - // Scan policy - typedef AgentScanPolicy <512, 4, OffsetT, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; - - // Keys-only downsweep policies - typedef AgentRadixSortDownsweepPolicy <64, 18, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; - typedef AgentRadixSortDownsweepPolicy <64, 18, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; - - // Key-value pairs downsweep policies - typedef AgentRadixSortDownsweepPolicy <128, 13, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; - typedef AgentRadixSortDownsweepPolicy <128, 13, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; - - // Downsweep policies - typedef typename If::Type DownsweepPolicy; - typedef typename If::Type AltDownsweepPolicy; - - // Single-tile policy - typedef DownsweepPolicy SingleTilePolicy; - - // Segmented policies - typedef DownsweepPolicy SegmentedPolicy; - typedef AltDownsweepPolicy AltSegmentedPolicy; - }; - - /// SM30 - struct Policy300 : ChainedPolicy<300, Policy300, Policy200> - { - enum { - PRIMARY_RADIX_BITS = 5, - ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, - }; - - // Keys-only upsweep policies - typedef AgentRadixSortUpsweepPolicy <256, 7, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; - typedef AgentRadixSortUpsweepPolicy <256, 7, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; - - // Key-value pairs upsweep policies - typedef AgentRadixSortUpsweepPolicy <256, 5, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; - typedef AgentRadixSortUpsweepPolicy <256, 5, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; - - // Upsweep policies - typedef typename If::Type UpsweepPolicy; - typedef typename If::Type AltUpsweepPolicy; - - // Scan policy - typedef AgentScanPolicy <1024, 4, OffsetT, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; - - // Keys-only downsweep policies - typedef AgentRadixSortDownsweepPolicy <128, 14, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; - typedef AgentRadixSortDownsweepPolicy <128, 14, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; - - // Key-value pairs downsweep policies - typedef AgentRadixSortDownsweepPolicy <128, 10, DominantT, BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; - typedef AgentRadixSortDownsweepPolicy <128, 10, DominantT, BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; - - // Downsweep policies - typedef typename If::Type DownsweepPolicy; - typedef typename If::Type AltDownsweepPolicy; - - // Single-tile policy - typedef DownsweepPolicy SingleTilePolicy; - - // Segmented policies - typedef DownsweepPolicy SegmentedPolicy; - typedef AltDownsweepPolicy AltSegmentedPolicy; - }; - - /// SM35 - struct Policy350 : ChainedPolicy<350, Policy350, Policy300> + struct Policy350 : ChainedPolicy<350, Policy350, Policy350> { enum { PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 1.72B 32b keys/s, 1.17B 32b pairs/s, 1.55B 32b segmented keys/s (K40m) diff --git a/cub/device/dispatch/dispatch_reduce.cuh b/cub/device/dispatch/dispatch_reduce.cuh index c9a5e4fbe0..f6aee45b36 100644 --- a/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/device/dispatch/dispatch_reduce.cuh @@ -246,46 +246,8 @@ struct DeviceReducePolicy // Architecture-specific tuning policies //------------------------------------------------------------------------------ - /// SM13 - struct Policy130 : ChainedPolicy<130, Policy130, Policy130> - { - // ReducePolicy - typedef AgentReducePolicy< - 128, 8, InputT, ///< Threads per block, items per thread, compute type - 2, ///< Number of items per vectorized load - BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use - LOAD_DEFAULT> ///< Cache load modifier - ReducePolicy; - - // SingleTilePolicy - typedef ReducePolicy SingleTilePolicy; - - // SegmentedReducePolicy - typedef ReducePolicy SegmentedReducePolicy; - }; - - - /// SM20 - struct Policy200 : ChainedPolicy<200, Policy200, Policy130> - { - // ReducePolicy (GTX 580: 178.9 GB/s @ 48M 4B items, 158.1 GB/s @ 192M 1B items) - typedef AgentReducePolicy< - 128, 8, InputT, ///< Threads per block, items per thread, compute type - 4, ///< Number of items per vectorized load - BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use - LOAD_DEFAULT> ///< Cache load modifier - ReducePolicy; - - // SingleTilePolicy - typedef ReducePolicy SingleTilePolicy; - - // SegmentedReducePolicy - typedef ReducePolicy SegmentedReducePolicy; - }; - - /// SM30 - struct Policy300 : ChainedPolicy<300, Policy300, Policy200> + struct Policy300 : ChainedPolicy<300, Policy300, Policy300> { // ReducePolicy (GTX670: 154.0 @ 48M 4B items) typedef AgentReducePolicy< diff --git a/cub/device/dispatch/dispatch_reduce_by_key.cuh b/cub/device/dispatch/dispatch_reduce_by_key.cuh index d8d8dcac41..09b531e081 100644 --- a/cub/device/dispatch/dispatch_reduce_by_key.cuh +++ b/cub/device/dispatch/dispatch_reduce_by_key.cuh @@ -179,96 +179,12 @@ struct DispatchReduceByKey ReduceByKeyPolicyT; }; - /// SM30 - struct Policy300 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 6, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), - }; - - typedef AgentReduceByKeyPolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - ReduceByKeyPolicyT; - }; - - /// SM20 - struct Policy200 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 11, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), - }; - - typedef AgentReduceByKeyPolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - ReduceByKeyPolicyT; - }; - - /// SM13 - struct Policy130 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), - }; - - typedef AgentReduceByKeyPolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - ReduceByKeyPolicyT; - }; - - /// SM11 - struct Policy110 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 5, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 8) / COMBINED_INPUT_BYTES)), - }; - - typedef AgentReduceByKeyPolicy< - 64, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_RAKING> - ReduceByKeyPolicyT; - }; - - /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ -#if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; -#elif (CUB_PTX_ARCH >= 300) - typedef Policy300 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 200) - typedef Policy200 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 130) - typedef Policy130 PtxPolicy; - -#else - typedef Policy110 PtxPolicy; - -#endif - // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxReduceByKeyPolicy : PtxPolicy::ReduceByKeyPolicyT {}; @@ -298,26 +214,10 @@ struct DispatchReduceByKey { #if CUB_INCLUDE_HOST_CODE // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version - if (ptx_version >= 350) - { - reduce_by_key_config.template Init(); - } - else if (ptx_version >= 300) - { - reduce_by_key_config.template Init(); - } - else if (ptx_version >= 200) - { - reduce_by_key_config.template Init(); - } - else if (ptx_version >= 130) - { - reduce_by_key_config.template Init(); - } - else - { - reduce_by_key_config.template Init(); - } + + // (There's only one policy right now) + (void)ptx_version; + reduce_by_key_config.template Init(); #endif } } diff --git a/cub/device/dispatch/dispatch_rle.cuh b/cub/device/dispatch/dispatch_rle.cuh index 68f8871516..c4b11038b5 100644 --- a/cub/device/dispatch/dispatch_rle.cuh +++ b/cub/device/dispatch/dispatch_rle.cuh @@ -165,100 +165,12 @@ struct DeviceRleDispatch RleSweepPolicy; }; - /// SM30 - struct Policy300 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 5, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef AgentRlePolicy< - 256, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - true, - BLOCK_SCAN_RAKING_MEMOIZE> - RleSweepPolicy; - }; - - /// SM20 - struct Policy200 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 15, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef AgentRlePolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - false, - BLOCK_SCAN_WARP_SCANS> - RleSweepPolicy; - }; - - /// SM13 - struct Policy130 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef AgentRlePolicy< - 64, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - true, - BLOCK_SCAN_RAKING_MEMOIZE> - RleSweepPolicy; - }; - - /// SM10 - struct Policy100 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef AgentRlePolicy< - 256, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - true, - BLOCK_SCAN_RAKING_MEMOIZE> - RleSweepPolicy; - }; - - /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ -#if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; -#elif (CUB_PTX_ARCH >= 300) - typedef Policy300 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 200) - typedef Policy200 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 130) - typedef Policy130 PtxPolicy; - -#else - typedef Policy100 PtxPolicy; - -#endif - // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxRleSweepPolicy : PtxPolicy::RleSweepPolicy {}; @@ -286,26 +198,10 @@ struct DeviceRleDispatch { #if CUB_INCLUDE_HOST_CODE // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version - if (ptx_version >= 350) - { - device_rle_config.template Init(); - } - else if (ptx_version >= 300) - { - device_rle_config.template Init(); - } - else if (ptx_version >= 200) - { - device_rle_config.template Init(); - } - else if (ptx_version >= 130) - { - device_rle_config.template Init(); - } - else - { - device_rle_config.template Init(); - } + + // (There's only one policy right now) + (void)ptx_version; + device_rle_config.template Init(); #endif } } diff --git a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh index 833de674a2..bdb1da6d8d 100644 --- a/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/device/dispatch/dispatch_scan.cuh @@ -140,61 +140,8 @@ template < struct DeviceScanPolicy { - /// SM10 - struct Policy100 : ChainedPolicy<100, Policy100, Policy100> - { - typedef AgentScanPolicy< - 64, 9, ///< Threads per block, items per thread - OutputT, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_STORE_WARP_TRANSPOSE, - BLOCK_SCAN_WARP_SCANS> - ScanPolicyT; - }; - - /// SM13 - struct Policy130 : ChainedPolicy<130, Policy130, Policy100> - { - typedef AgentScanPolicy< - 96, 21, ///< Threads per block, items per thread - OutputT, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_STORE_WARP_TRANSPOSE, - BLOCK_SCAN_RAKING_MEMOIZE> - ScanPolicyT; - }; - - /// SM20 - struct Policy200 : ChainedPolicy<200, Policy200, Policy130> - { - // GTX 580: 20.3B items/s (162.3 GB/s) @ 48M 32-bit T - typedef AgentScanPolicy< - 128, 12, ///< Threads per block, items per thread - OutputT, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_STORE_WARP_TRANSPOSE, - BLOCK_SCAN_WARP_SCANS> - ScanPolicyT; - }; - - /// SM30 - struct Policy300 : ChainedPolicy<300, Policy300, Policy200> - { - typedef AgentScanPolicy< - 256, 9, ///< Threads per block, items per thread - OutputT, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_STORE_WARP_TRANSPOSE, - BLOCK_SCAN_WARP_SCANS> - ScanPolicyT; - }; - /// SM35 - struct Policy350 : ChainedPolicy<350, Policy350, Policy300> + struct Policy350 : ChainedPolicy<350, Policy350, Policy350> { // GTX Titan: 29.5B items/s (232.4 GB/s) @ 48M 32-bit T typedef AgentScanPolicy< diff --git a/cub/device/dispatch/dispatch_select_if.cuh b/cub/device/dispatch/dispatch_select_if.cuh index 5fec4cff72..a1d8c453f0 100644 --- a/cub/device/dispatch/dispatch_select_if.cuh +++ b/cub/device/dispatch/dispatch_select_if.cuh @@ -170,96 +170,12 @@ struct DispatchSelectIf SelectIfPolicyT; }; - /// SM30 - struct Policy300 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), - }; - - typedef AgentSelectIfPolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - SelectIfPolicyT; - }; - - /// SM20 - struct Policy200 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = (KEEP_REJECTS) ? 7 : 15, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), - }; - - typedef AgentSelectIfPolicy< - 128, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - SelectIfPolicyT; - }; - - /// SM13 - struct Policy130 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), - }; - - typedef AgentSelectIfPolicy< - 64, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_RAKING_MEMOIZE> - SelectIfPolicyT; - }; - - /// SM10 - struct Policy100 - { - enum { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), - }; - - typedef AgentSelectIfPolicy< - 64, - ITEMS_PER_THREAD, - BLOCK_LOAD_WARP_TRANSPOSE, - LOAD_DEFAULT, - BLOCK_SCAN_RAKING> - SelectIfPolicyT; - }; - - /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ -#if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; -#elif (CUB_PTX_ARCH >= 300) - typedef Policy300 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 200) - typedef Policy200 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 130) - typedef Policy130 PtxPolicy; - -#else - typedef Policy100 PtxPolicy; - -#endif - // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSelectIfPolicyT : PtxPolicy::SelectIfPolicyT {}; @@ -288,26 +204,10 @@ struct DispatchSelectIf { #if CUB_INCLUDE_HOST_CODE // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version - if (ptx_version >= 350) - { - select_if_config.template Init(); - } - else if (ptx_version >= 300) - { - select_if_config.template Init(); - } - else if (ptx_version >= 200) - { - select_if_config.template Init(); - } - else if (ptx_version >= 130) - { - select_if_config.template Init(); - } - else - { - select_if_config.template Init(); - } + + // (There's only one policy right now) + (void)ptx_version; + select_if_config.template Init(); #endif } } diff --git a/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/device/dispatch/dispatch_spmv_orig.cuh index fb431df2cb..a5095daf19 100644 --- a/cub/device/dispatch/dispatch_spmv_orig.cuh +++ b/cub/device/dispatch/dispatch_spmv_orig.cuh @@ -264,83 +264,6 @@ struct DispatchSpmv // Tuning policies //--------------------------------------------------------------------- - /// SM11 - struct Policy110 - { - typedef AgentSpmvPolicy< - 128, - 1, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - false, - BLOCK_SCAN_WARP_SCANS> - SpmvPolicyT; - - typedef AgentSegmentFixupPolicy< - 128, - 4, - BLOCK_LOAD_VECTORIZE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - SegmentFixupPolicyT; - }; - - /// SM20 - struct Policy200 - { - typedef AgentSpmvPolicy< - 96, - 18, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - false, - BLOCK_SCAN_RAKING> - SpmvPolicyT; - - typedef AgentSegmentFixupPolicy< - 128, - 4, - BLOCK_LOAD_VECTORIZE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - SegmentFixupPolicyT; - - }; - - - - /// SM30 - struct Policy300 - { - typedef AgentSpmvPolicy< - 96, - 6, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - LOAD_DEFAULT, - false, - BLOCK_SCAN_WARP_SCANS> - SpmvPolicyT; - - typedef AgentSegmentFixupPolicy< - 128, - 4, - BLOCK_LOAD_VECTORIZE, - LOAD_DEFAULT, - BLOCK_SCAN_WARP_SCANS> - SegmentFixupPolicyT; - - }; - - /// SM35 struct Policy350 { @@ -457,17 +380,8 @@ struct DispatchSpmv #elif (CUB_PTX_ARCH >= 370) typedef Policy370 PtxPolicy; -#elif (CUB_PTX_ARCH >= 350) - typedef Policy350 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 300) - typedef Policy300 PtxPolicy; - -#elif (CUB_PTX_ARCH >= 200) - typedef Policy200 PtxPolicy; - #else - typedef Policy110 PtxPolicy; + typedef Policy350 PtxPolicy; #endif @@ -517,26 +431,11 @@ struct DispatchSpmv spmv_config.template Init(); segment_fixup_config.template Init(); } - else if (ptx_version >= 350) + else { spmv_config.template Init(); segment_fixup_config.template Init(); } - else if (ptx_version >= 300) - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - else if (ptx_version >= 200) - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } - else - { - spmv_config.template Init(); - segment_fixup_config.template Init(); - } #endif } }