Skip to content

Commit

Permalink
Use new cub::DivideAndRoundUp util to avoid overflow errors.
Browse files Browse the repository at this point in the history
The expression `(n + d - 1) / d` can overflow the numerator. The
new method avoids that.

See NVIDIA/cub#221 for reference.
  • Loading branch information
alliepiper committed Feb 10, 2021
1 parent 52299b5 commit 000fa48
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 11 deletions.
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include <thrust/detail/mpl/math.h>
#include <thrust/detail/minmax.h>

#include <cub/util_math.cuh>

namespace thrust
{

Expand Down Expand Up @@ -394,7 +396,7 @@ namespace __adjacent_difference {


Size tile_size = difference_plan.items_per_tile;
Size num_tiles = (num_items + tile_size - 1) / tile_size;
Size num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t tmp1 = num_tiles * sizeof(input_type);
size_t vshmem_size = core::vshmem_size(difference_plan.shared_memory_size,
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>

#include <cub/util_math.cuh>

namespace thrust
{
// XXX declare generic copy_if interface
Expand Down Expand Up @@ -636,7 +638,7 @@ namespace __copy_if {
typename get_plan<copy_if_agent>::type copy_if_plan = copy_if_agent::get_plan(stream);

int tile_size = copy_if_plan.items_per_tile;
size_t num_tiles = (num_items + tile_size - 1) / tile_size;
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(copy_if_plan.shared_memory_size,
num_tiles);
Expand Down
5 changes: 3 additions & 2 deletions thrust/system/cuda/detail/extrema.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <thrust/pair.h>
#include <thrust/distance.h>

#include <cub/util_math.cuh>

namespace thrust
{
namespace cuda_cub {
Expand Down Expand Up @@ -259,8 +261,7 @@ namespace __extrema {
else if (reduce_plan.grid_mapping == cub::GRID_MAPPING_DYNAMIC)
{
// Work is distributed dynamically
size_t num_tiles = (num_items + reduce_plan.items_per_tile - 1) /
reduce_plan.items_per_tile;
size_t num_tiles = cub::DivideAndRoundUp(num_items, reduce_plan.items_per_tile);

// if not enough to fill the device with threadblocks
// then fill the device with threadblocks
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include <thrust/pair.h>
#include <thrust/distance.h>

#include <cub/util_math.cuh>

namespace thrust
{
namespace cuda_cub {
Expand Down Expand Up @@ -645,7 +647,7 @@ namespace __partition {
typename get_plan<partition_agent>::type partition_plan = partition_agent::get_plan(stream);

int tile_size = partition_plan.items_per_tile;
size_t num_tiles = (num_items + tile_size - 1) / tile_size;
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_storage = core::vshmem_size(partition_plan.shared_memory_size,
num_tiles);
Expand Down
5 changes: 3 additions & 2 deletions thrust/system/cuda/detail/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>

#include <cub/util_math.cuh>

namespace thrust
{

Expand Down Expand Up @@ -802,8 +804,7 @@ namespace __reduce {
else if (reduce_plan.grid_mapping == cub::GRID_MAPPING_DYNAMIC)
{
// Work is distributed dynamically
size_t num_tiles = (num_items + reduce_plan.items_per_tile - 1) /
reduce_plan.items_per_tile;
size_t num_tiles = cub::DivideAndRoundUp(num_items, reduce_plan.items_per_tile);

// if not enough to fill the device with threadblocks
// then fill the device with threadblocks
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/reduce_by_key.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>

#include <cub/util_math.cuh>

namespace thrust
{

Expand Down Expand Up @@ -909,7 +911,7 @@ namespace __reduce_by_key {

// Number of input tiles
int tile_size = reduce_by_key_plan.items_per_tile;
Size num_tiles = (num_items + tile_size - 1) / tile_size;
Size num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(reduce_by_key_plan.shared_memory_size,
num_tiles);
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/scan_by_key.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
#include <thrust/detail/minmax.h>
#include <thrust/distance.h>

#include <cub/util_math.cuh>

namespace thrust
{
namespace cuda_cub {
Expand Down Expand Up @@ -670,7 +672,7 @@ namespace __scan_by_key {
AgentPlan init_plan = init_agent::get_plan();

int tile_size = scan_by_key_plan.items_per_tile;
size_t num_tiles = (num_items + tile_size - 1) / tile_size;
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(scan_by_key_plan.shared_memory_size,
num_tiles);
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/unique.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include <thrust/detail/minmax.h>
#include <thrust/distance.h>

#include <cub/util_math.cuh>

namespace thrust
{

Expand Down Expand Up @@ -578,7 +580,7 @@ namespace __unique {


int tile_size = unique_plan.items_per_tile;
size_t num_tiles = (num_items + tile_size - 1) / tile_size;
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(unique_plan.shared_memory_size,
num_tiles);
Expand Down
4 changes: 3 additions & 1 deletion thrust/system/cuda/detail/unique_by_key.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>

#include <cub/util_math.cuh>

namespace thrust
{

Expand Down Expand Up @@ -660,7 +662,7 @@ namespace __unique_by_key {


int tile_size = unique_plan.items_per_tile;
size_t num_tiles = (num_items + tile_size - 1) / tile_size;
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(unique_plan.shared_memory_size,
num_tiles);
Expand Down

0 comments on commit 000fa48

Please sign in to comment.