Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Remove policies for arch < sm35. #213

Merged
merged 1 commit into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 2 additions & 69 deletions cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -341,52 +341,6 @@ struct DipatchHistogram
};
};


/// SM11
struct Policy110
{
// HistogramSweepPolicy
typedef AgentHistogramPolicy<
512,
(NUM_CHANNELS == 1) ? 8 : 2,
BLOCK_LOAD_DIRECT,
LOAD_DEFAULT,
true,
GMEM,
false>
HistogramSweepPolicy;
};

/// SM20
struct Policy200
{
// HistogramSweepPolicy
typedef AgentHistogramPolicy<
(NUM_CHANNELS == 1) ? 256 : 128,
(NUM_CHANNELS == 1) ? 8 : 3,
(NUM_CHANNELS == 1) ? BLOCK_LOAD_DIRECT : BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
true,
SMEM,
false>
HistogramSweepPolicy;
};

/// SM30
struct Policy300
{
// HistogramSweepPolicy
typedef AgentHistogramPolicy<
512,
(NUM_CHANNELS == 1) ? 8 : 2,
BLOCK_LOAD_DIRECT,
LOAD_DEFAULT,
true,
GMEM,
false>
HistogramSweepPolicy;
};

/// SM35
struct Policy350
{
Expand Down Expand Up @@ -426,17 +380,8 @@ struct DipatchHistogram
#if (CUB_PTX_ARCH >= 500)
typedef Policy500 PtxPolicy;

#elif (CUB_PTX_ARCH >= 350)
typedef Policy350 PtxPolicy;

#elif (CUB_PTX_ARCH >= 300)
typedef Policy300 PtxPolicy;

#elif (CUB_PTX_ARCH >= 200)
typedef Policy200 PtxPolicy;

#else
typedef Policy110 PtxPolicy;
typedef Policy350 PtxPolicy;

#endif

Expand Down Expand Up @@ -473,21 +418,9 @@ struct DipatchHistogram
{
result = histogram_sweep_config.template Init<typename Policy500::HistogramSweepPolicy>();
}
else if (ptx_version >= 350)
{
result = histogram_sweep_config.template Init<typename Policy350::HistogramSweepPolicy>();
}
else if (ptx_version >= 300)
{
result = histogram_sweep_config.template Init<typename Policy300::HistogramSweepPolicy>();
}
else if (ptx_version >= 200)
{
result = histogram_sweep_config.template Init<typename Policy200::HistogramSweepPolicy>();
}
else
{
result = histogram_sweep_config.template Init<typename Policy110::HistogramSweepPolicy>();
result = histogram_sweep_config.template Init<typename Policy350::HistogramSweepPolicy>();
}
#endif
}
Expand Down
92 changes: 1 addition & 91 deletions cub/device/dispatch/dispatch_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -529,98 +529,8 @@ struct DeviceRadixSortPolicy
// Architecture-specific tuning policies
//------------------------------------------------------------------------------

/// SM20
struct Policy200 : ChainedPolicy<200, Policy200, Policy200>
{
enum {
PRIMARY_RADIX_BITS = 5,
ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1,

// Relative size of KeyT type to a 4-byte word
SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4,
};

// Keys-only upsweep policies
typedef AgentRadixSortUpsweepPolicy <64, 18, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys;
typedef AgentRadixSortUpsweepPolicy <64, 18, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys;

// Key-value pairs upsweep policies
typedef AgentRadixSortUpsweepPolicy <128, 13, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs;
typedef AgentRadixSortUpsweepPolicy <128, 13, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs;

// Upsweep policies
typedef typename If<KEYS_ONLY, UpsweepPolicyKeys, UpsweepPolicyPairs>::Type UpsweepPolicy;
typedef typename If<KEYS_ONLY, AltUpsweepPolicyKeys, AltUpsweepPolicyPairs>::Type AltUpsweepPolicy;

// Scan policy
typedef AgentScanPolicy <512, 4, OffsetT, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy;

// Keys-only downsweep policies
typedef AgentRadixSortDownsweepPolicy <64, 18, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys;
typedef AgentRadixSortDownsweepPolicy <64, 18, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys;

// Key-value pairs downsweep policies
typedef AgentRadixSortDownsweepPolicy <128, 13, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs;
typedef AgentRadixSortDownsweepPolicy <128, 13, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs;

// Downsweep policies
typedef typename If<KEYS_ONLY, DownsweepPolicyKeys, DownsweepPolicyPairs>::Type DownsweepPolicy;
typedef typename If<KEYS_ONLY, AltDownsweepPolicyKeys, AltDownsweepPolicyPairs>::Type AltDownsweepPolicy;

// Single-tile policy
typedef DownsweepPolicy SingleTilePolicy;

// Segmented policies
typedef DownsweepPolicy SegmentedPolicy;
typedef AltDownsweepPolicy AltSegmentedPolicy;
};

/// SM30
struct Policy300 : ChainedPolicy<300, Policy300, Policy200>
{
enum {
PRIMARY_RADIX_BITS = 5,
ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1,
};

// Keys-only upsweep policies
typedef AgentRadixSortUpsweepPolicy <256, 7, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys;
typedef AgentRadixSortUpsweepPolicy <256, 7, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys;

// Key-value pairs upsweep policies
typedef AgentRadixSortUpsweepPolicy <256, 5, DominantT, LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs;
typedef AgentRadixSortUpsweepPolicy <256, 5, DominantT, LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs;

// Upsweep policies
typedef typename If<KEYS_ONLY, UpsweepPolicyKeys, UpsweepPolicyPairs>::Type UpsweepPolicy;
typedef typename If<KEYS_ONLY, AltUpsweepPolicyKeys, AltUpsweepPolicyPairs>::Type AltUpsweepPolicy;

// Scan policy
typedef AgentScanPolicy <1024, 4, OffsetT, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy;

// Keys-only downsweep policies
typedef AgentRadixSortDownsweepPolicy <128, 14, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys;
typedef AgentRadixSortDownsweepPolicy <128, 14, DominantT, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys;

// Key-value pairs downsweep policies
typedef AgentRadixSortDownsweepPolicy <128, 10, DominantT, BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs;
typedef AgentRadixSortDownsweepPolicy <128, 10, DominantT, BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs;

// Downsweep policies
typedef typename If<KEYS_ONLY, DownsweepPolicyKeys, DownsweepPolicyPairs>::Type DownsweepPolicy;
typedef typename If<KEYS_ONLY, AltDownsweepPolicyKeys, AltDownsweepPolicyPairs>::Type AltDownsweepPolicy;

// Single-tile policy
typedef DownsweepPolicy SingleTilePolicy;

// Segmented policies
typedef DownsweepPolicy SegmentedPolicy;
typedef AltDownsweepPolicy AltSegmentedPolicy;
};


/// SM35
struct Policy350 : ChainedPolicy<350, Policy350, Policy300>
struct Policy350 : ChainedPolicy<350, Policy350, Policy350>
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 1.72B 32b keys/s, 1.17B 32b pairs/s, 1.55B 32b segmented keys/s (K40m)
Expand Down
40 changes: 1 addition & 39 deletions cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -246,46 +246,8 @@ struct DeviceReducePolicy
// Architecture-specific tuning policies
//------------------------------------------------------------------------------

/// SM13
struct Policy130 : ChainedPolicy<130, Policy130, Policy130>
{
// ReducePolicy
typedef AgentReducePolicy<
128, 8, InputT, ///< Threads per block, items per thread, compute type
2, ///< Number of items per vectorized load
BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use
LOAD_DEFAULT> ///< Cache load modifier
ReducePolicy;

// SingleTilePolicy
typedef ReducePolicy SingleTilePolicy;

// SegmentedReducePolicy
typedef ReducePolicy SegmentedReducePolicy;
};


/// SM20
struct Policy200 : ChainedPolicy<200, Policy200, Policy130>
{
// ReducePolicy (GTX 580: 178.9 GB/s @ 48M 4B items, 158.1 GB/s @ 192M 1B items)
typedef AgentReducePolicy<
128, 8, InputT, ///< Threads per block, items per thread, compute type
4, ///< Number of items per vectorized load
BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use
LOAD_DEFAULT> ///< Cache load modifier
ReducePolicy;

// SingleTilePolicy
typedef ReducePolicy SingleTilePolicy;

// SegmentedReducePolicy
typedef ReducePolicy SegmentedReducePolicy;
};


/// SM30
struct Policy300 : ChainedPolicy<300, Policy300, Policy200>
struct Policy300 : ChainedPolicy<300, Policy300, Policy300>
{
// ReducePolicy (GTX670: 154.0 @ 48M 4B items)
typedef AgentReducePolicy<
Expand Down
108 changes: 4 additions & 104 deletions cub/device/dispatch/dispatch_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,96 +179,12 @@ struct DispatchReduceByKey
ReduceByKeyPolicyT;
};

/// SM30
struct Policy300
{
enum {
NOMINAL_4B_ITEMS_PER_THREAD = 6,
ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)),
};

typedef AgentReduceByKeyPolicy<
128,
ITEMS_PER_THREAD,
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_SCAN_WARP_SCANS>
ReduceByKeyPolicyT;
};

/// SM20
struct Policy200
{
enum {
NOMINAL_4B_ITEMS_PER_THREAD = 11,
ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)),
};

typedef AgentReduceByKeyPolicy<
128,
ITEMS_PER_THREAD,
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_SCAN_WARP_SCANS>
ReduceByKeyPolicyT;
};

/// SM13
struct Policy130
{
enum {
NOMINAL_4B_ITEMS_PER_THREAD = 7,
ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)),
};

typedef AgentReduceByKeyPolicy<
128,
ITEMS_PER_THREAD,
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_SCAN_WARP_SCANS>
ReduceByKeyPolicyT;
};

/// SM11
struct Policy110
{
enum {
NOMINAL_4B_ITEMS_PER_THREAD = 5,
ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 8) / COMBINED_INPUT_BYTES)),
};

typedef AgentReduceByKeyPolicy<
64,
ITEMS_PER_THREAD,
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_SCAN_RAKING>
ReduceByKeyPolicyT;
};


/******************************************************************************
* Tuning policies of current PTX compiler pass
******************************************************************************/

#if (CUB_PTX_ARCH >= 350)
typedef Policy350 PtxPolicy;

#elif (CUB_PTX_ARCH >= 300)
typedef Policy300 PtxPolicy;

#elif (CUB_PTX_ARCH >= 200)
typedef Policy200 PtxPolicy;

#elif (CUB_PTX_ARCH >= 130)
typedef Policy130 PtxPolicy;

#else
typedef Policy110 PtxPolicy;

#endif

// "Opaque" policies (whose parameterizations aren't reflected in the type signature)
struct PtxReduceByKeyPolicy : PtxPolicy::ReduceByKeyPolicyT {};

Expand Down Expand Up @@ -298,26 +214,10 @@ struct DispatchReduceByKey
{
#if CUB_INCLUDE_HOST_CODE
// We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
if (ptx_version >= 350)
{
reduce_by_key_config.template Init<typename Policy350::ReduceByKeyPolicyT>();
}
else if (ptx_version >= 300)
{
reduce_by_key_config.template Init<typename Policy300::ReduceByKeyPolicyT>();
}
else if (ptx_version >= 200)
{
reduce_by_key_config.template Init<typename Policy200::ReduceByKeyPolicyT>();
}
else if (ptx_version >= 130)
{
reduce_by_key_config.template Init<typename Policy130::ReduceByKeyPolicyT>();
}
else
{
reduce_by_key_config.template Init<typename Policy110::ReduceByKeyPolicyT>();
}

// (There's only one policy right now)
(void)ptx_version;
reduce_by_key_config.template Init<typename Policy350::ReduceByKeyPolicyT>();
#endif
}
}
Expand Down
Loading