Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CubDebug #430

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,13 @@ struct ScanTileState<T, false>
allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for inclusives

// Compute allocation pointers into the single storage blob
if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break;
error = CubDebug(
AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));

if (cudaSuccess != error)
{
break;
}

// Alias the offsets
d_tile_status = reinterpret_cast<StatusWord*>(allocations[0]);
Expand Down
28 changes: 16 additions & 12 deletions cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ struct DispatchAdjacentDifference : public SelectedPolicy
void *allocations[1] = {nullptr};
std::size_t allocation_sizes[1] = {MayAlias * first_tile_previous_size};

if (CubDebug(error = AliasTemporaries(d_temp_storage,
temp_storage_bytes,
allocations,
allocation_sizes)))
error = CubDebug(
AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));

if (cudaSuccess != error)
{
break;
}
Expand Down Expand Up @@ -278,15 +278,16 @@ struct DispatchAdjacentDifference : public SelectedPolicy
num_tiles,
tile_size);

error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

if (CubDebug(error))
if (cudaSuccess != error)
{
break;
}

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError()))
error = CubDebug(cudaPeekAtLastError());
if (cudaSuccess != error)
{
break;
}
Expand Down Expand Up @@ -319,15 +320,16 @@ struct DispatchAdjacentDifference : public SelectedPolicy
difference_op,
num_items);

error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

if (CubDebug(error))
if (cudaSuccess != error)
{
break;
}

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError()))
error = CubDebug(cudaPeekAtLastError());
if (cudaSuccess != error)
{
break;
}
Expand All @@ -352,7 +354,8 @@ struct DispatchAdjacentDifference : public SelectedPolicy
{
// Get PTX version
int ptx_version = 0;
if (CubDebug(error = PtxVersion(ptx_version)))
error = CubDebug(PtxVersion(ptx_version));
if (cudaSuccess != error)
{
break;
}
Expand All @@ -367,7 +370,8 @@ struct DispatchAdjacentDifference : public SelectedPolicy
stream);

// Dispatch to chained policy
if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch)))
error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
if (cudaSuccess != error)
{
break;
}
Expand Down
63 changes: 36 additions & 27 deletions cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -470,13 +470,14 @@ struct DispatchBatchMemcpy : SelectedPolicy

std::size_t buffer_offset_scan_storage = 0;
std::size_t blev_block_scan_storage = 0;
CubDebug(error = BLevBufferOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
error = CubDebug(BLevBufferOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
buffer_offset_scan_storage));
if (error)
{
return error;
}
CubDebug(error = BLevBlockOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),

error = CubDebug(BLevBlockOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
blev_block_scan_storage));
if (error)
{
Expand Down Expand Up @@ -504,8 +505,8 @@ struct DispatchBatchMemcpy : SelectedPolicy
}

// Alias memory buffers into the storage blob
if (CubDebug(
error = temporary_storage_layout.map_to_buffer(d_temp_storage, temp_storage_bytes)))
error = CubDebug(temporary_storage_layout.map_to_buffer(d_temp_storage, temp_storage_bytes));
if (cudaSuccess != error)
{
return error;
}
Expand Down Expand Up @@ -551,25 +552,26 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Get device ordinal
int device_ordinal;
if (CubDebug(error = cudaGetDevice(&device_ordinal)))
error = CubDebug(cudaGetDevice(&device_ordinal));
if (cudaSuccess != error)
{
return error;
}

// Get SM count
int sm_count;
if (CubDebug(error = cudaDeviceGetAttribute(&sm_count,
cudaDevAttrMultiProcessorCount,
device_ordinal)))
error =
CubDebug(cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal));
if (cudaSuccess != error)
{
return error;
}

// Get SM occupancy for the batch memcpy block-level buffers kernel
int batch_memcpy_blev_occupancy;
if (CubDebug(error = MaxSmOccupancy(batch_memcpy_blev_occupancy,
multi_block_memcpy_kernel,
BLEV_BLOCK_THREADS)))
error = CubDebug(
MaxSmOccupancy(batch_memcpy_blev_occupancy, multi_block_memcpy_kernel, BLEV_BLOCK_THREADS));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -579,18 +581,20 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Construct the tile status for the buffer prefix sum
BLevBufferOffsetTileState buffer_scan_tile_state;
if (CubDebug(error = buffer_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_buffer_scan_alloc.get(),
buffer_offset_scan_storage)))
error = CubDebug(buffer_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_buffer_scan_alloc.get(),
buffer_offset_scan_storage));
if (cudaSuccess != error)
{
return error;
}

// Construct the tile status for thread blocks-to-buffer-assignment prefix sum
BLevBlockOffsetTileState block_scan_tile_state;
if (CubDebug(error = block_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_block_scan_alloc.get(),
blev_block_scan_storage)))
error = CubDebug(block_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_block_scan_alloc.get(),
blev_block_scan_storage));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -612,16 +616,17 @@ struct DispatchBatchMemcpy : SelectedPolicy
.doit(init_scan_states_kernel, buffer_scan_tile_state, block_scan_tile_state, num_tiles);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

// Check for failure to launch
if (CubDebug(error))
if (cudaSuccess != error)
{
return error;
}
Expand Down Expand Up @@ -654,14 +659,15 @@ struct DispatchBatchMemcpy : SelectedPolicy
block_scan_tile_state);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
if (CubDebug(error))
error = CubDebug(detail::DebugSyncStream(stream));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -687,13 +693,14 @@ struct DispatchBatchMemcpy : SelectedPolicy
batch_memcpy_grid_size - 1);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

return error;
}
Expand All @@ -718,7 +725,8 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Get PTX version
int ptx_version = 0;
if (CubDebug(error = PtxVersion(ptx_version)))
error = CubDebug(PtxVersion(ptx_version));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -733,7 +741,8 @@ struct DispatchBatchMemcpy : SelectedPolicy
stream);

// Dispatch to chained policy
if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch)))
error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
if (cudaSuccess != error)
{
return error;
}
Expand Down
Loading