Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #352 from allisonvacanti/test_spmv
Browse files Browse the repository at this point in the history
cub::DeviceSpmv: Various fixes and tests
  • Loading branch information
alliepiper authored Aug 17, 2021
2 parents 571aab9 + 91c6e25 commit f22ad19
Show file tree
Hide file tree
Showing 6 changed files with 668 additions and 47 deletions.
54 changes: 33 additions & 21 deletions cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
#include "../thread/thread_operators.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../iterator/counting_input_iterator.cuh"
#include "../iterator/tex_obj_input_iterator.cuh"

CUB_NAMESPACE_BEGIN

Expand Down Expand Up @@ -94,18 +93,16 @@ template <
typename OffsetT> ///< Signed integer type for sequence offsets
struct SpmvParams
{
ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values
OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
const ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
const OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values
const OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
const ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector <em>y</em>
int num_rows; ///< Number of rows of matrix <b>A</b>.
int num_cols; ///< Number of columns of matrix <b>A</b>.
int num_nonzeros; ///< Number of nonzero elements of matrix <b>A</b>.
ValueT alpha; ///< Alpha multiplicand
ValueT beta; ///< Beta addend-multiplicand

TexObjInputIterator<ValueT, OffsetT> t_vector_x;
};


Expand Down Expand Up @@ -294,9 +291,12 @@ struct AgentSpmv
OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset;

// Gather the row end-offsets for the merge tile into shared memory
for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS)
for (int item = threadIdx.x; item < tile_num_rows + ITEMS_PER_THREAD; item += BLOCK_THREADS)
{
s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item];
const OffsetT offset =
(cub::min)(static_cast<OffsetT>(tile_start_coord.x + item),
static_cast<OffsetT>(spmv_params.num_rows - 1));
s_tile_row_end_offsets[item] = wd_row_end_offsets[offset];
}

CTA_SYNC();
Expand Down Expand Up @@ -328,10 +328,8 @@ struct AgentSpmv
OffsetT column_idx = wd_column_indices[nonzero_idx];
ValueT value = wd_values[nonzero_idx];

ValueT vector_value = spmv_params.t_vector_x[column_idx];
#if (CUB_PTX_ARCH >= 350)
vector_value = wd_vector_x[column_idx];
#endif
ValueT vector_value = wd_vector_x[column_idx];

ValueT nonzero = value * vector_value;

OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x];
Expand Down Expand Up @@ -437,8 +435,7 @@ struct AgentSpmv
OffsetT column_idx = *ci;
ValueT value = *a;

ValueT vector_value = spmv_params.t_vector_x[column_idx];
vector_value = wd_vector_x[column_idx];
ValueT vector_value = wd_vector_x[column_idx];

ValueT nonzero = value * vector_value;

Expand All @@ -464,10 +461,8 @@ struct AgentSpmv
OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx];
ValueT value = wd_values[tile_start_coord.y + nonzero_idx];

ValueT vector_value = spmv_params.t_vector_x[column_idx];
#if (CUB_PTX_ARCH >= 350)
vector_value = wd_vector_x[column_idx];
#endif
ValueT vector_value = wd_vector_x[column_idx];

ValueT nonzero = value * vector_value;

s_tile_nonzeros[nonzero_idx] = nonzero;
Expand All @@ -478,9 +473,12 @@ struct AgentSpmv

// Gather the row end-offsets for the merge tile into shared memory
#pragma unroll 1
for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS)
for (int item = threadIdx.x; item < tile_num_rows + ITEMS_PER_THREAD; item += BLOCK_THREADS)
{
s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item];
const OffsetT offset =
(cub::min)(static_cast<OffsetT>(tile_start_coord.x + item),
static_cast<OffsetT>(spmv_params.num_rows - 1));
s_tile_row_end_offsets[item] = wd_row_end_offsets[offset];
}

CTA_SYNC();
Expand Down Expand Up @@ -648,9 +646,23 @@ struct AgentSpmv
if (threadIdx.x == 0)
{
if (HAS_ALPHA)
{
tile_carry.value *= spmv_params.alpha;
}

tile_carry.key += tile_start_coord.x;
if (tile_carry.key >= spmv_params.num_rows)
{
// FIXME: This works around an invalid memory access in the
// fixup kernel. The underlying issue needs to be debugged and
// properly fixed, but this hack prevents writes to
// out-of-bounds addresses. It doesn't appear to have an effect
// on the validity of the results, since this only affects the
// carry-over from last tile in the input.
tile_carry.key = spmv_params.num_rows - 1;
tile_carry.value = ValueT{};
};

d_tile_carry_pairs[tile_idx] = tile_carry;
}
}
Expand Down
12 changes: 6 additions & 6 deletions cub/device/device_spmv.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ struct DeviceSpmv
static cudaError_t CsrMV(
void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done.
size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation
ValueT* d_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
int* d_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_column_indices and \p d_values (with the final entry being equal to \p num_nonzeros)
int* d_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
const ValueT* d_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
const int* d_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_column_indices and \p d_values (with the final entry being equal to \p num_nonzeros)
const int* d_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
const ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
ValueT* d_vector_y, ///< [out] Pointer to the array of \p num_rows values corresponding to the dense output vector <em>y</em>
int num_rows, ///< [in] number of rows of matrix <b>A</b>.
int num_cols, ///< [in] number of columns of matrix <b>A</b>.
Expand All @@ -148,8 +148,8 @@ struct DeviceSpmv
spmv_params.num_rows = num_rows;
spmv_params.num_cols = num_cols;
spmv_params.num_nonzeros = num_nonzeros;
spmv_params.alpha = 1.0;
spmv_params.beta = 0.0;
spmv_params.alpha = ValueT{1};
spmv_params.beta = ValueT{0};

return DispatchSpmv<ValueT, int>::Dispatch(
d_temp_storage,
Expand Down
31 changes: 15 additions & 16 deletions cub/device/dispatch/dispatch_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,21 @@ struct DispatchSpmv
cudaError error = cudaSuccess;
do
{
if (spmv_params.num_rows < 0 || spmv_params.num_cols < 0)
{
return cudaErrorInvalidValue;
}

if (spmv_params.num_rows == 0 || spmv_params.num_cols == 0)
{ // Empty problem, no-op.
if (d_temp_storage == NULL)
{
temp_storage_bytes = 1;
}

break;
}

if (spmv_params.num_cols == 1)
{
if (d_temp_storage == NULL)
Expand Down Expand Up @@ -603,14 +618,6 @@ struct DispatchSpmv
int search_block_size = INIT_KERNEL_THREADS;
int search_grid_size = cub::DivideAndRoundUp(num_merge_tiles + 1, search_block_size);

#if CUB_INCLUDE_HOST_CODE
if (CUB_IS_HOST_CODE)
{
// Init textures
if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x, spmv_params.num_cols * sizeof(ValueT)))) break;
}
#endif

if (search_grid_size < sm_count)
// if (num_merge_tiles < spmv_sm_occupancy * sm_count)
{
Expand Down Expand Up @@ -685,14 +692,6 @@ struct DispatchSpmv
// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
}

#if CUB_INCLUDE_HOST_CODE
if (CUB_IS_HOST_CODE)
{
// Free textures
if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
}
#endif
}
while (0);

Expand Down
2 changes: 1 addition & 1 deletion cub/iterator/tex_obj_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ public:
size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator
{
this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
this->tex_offset = tex_offset;
this->tex_offset = static_cast<difference_type>(tex_offset);

cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
cudaResourceDesc res_desc;
Expand Down
Loading

0 comments on commit f22ad19

Please sign in to comment.