Skip to content

Commit

Permalink
[WORKAROUND] Generic-Reduction: Partially resolve SWDEV-291479. W/A f…
Browse files Browse the repository at this point in the history
…or SWDEV-274384. (#998)

* Add volatile to variable/parameter declarations to avoid the lost indices update issue due to strange compiler
* Adjust the input data generation methods for reduce_test
* Fix to the network config ID setting
* Revert "[HOTFIX][WORKAROUND] Disable test_reduce_double (SWDEV-291479) (#994)"
* Formalize workaround for SWDEV-274384
  • Loading branch information
qianfengz authored Jun 23, 2021
1 parent 0bb242a commit aabe099
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ struct binop_with_nan_check<NanPropagation_t::NOT_PROPAGATE_NAN, opReduce, compT
};

// The method is called when the opReduce is indexable and the user asked for indices
__device__ static inline void
calculate(const compType& accuVal, compType currVal, int& accuIndex, int currIndex)
__device__ static inline void calculate(const compType& accuVal,
compType currVal,
VOLATILE_WA_274384 int& accuIndex,
int currIndex)
{
bool changed = false;
VOLATILE_WA_274384 bool changed = false;

opReduce{}(const_cast<compType&>(accuVal), currVal, changed);

Expand All @@ -75,7 +77,7 @@ struct binop_with_nan_check<NanPropagation_t::PROPAGATE_NAN, opReduce, compType>

// The method is called when the opReduce is indexable and the user asked for indices
__device__ static inline void
calculate(compType& accuVal, compType currVal, int& accuIndex, int currIndex)
calculate(compType& accuVal, compType currVal, VOLATILE_WA_274384 int& accuIndex, int currIndex)
{
if(isnan(currVal))
{
Expand All @@ -84,7 +86,7 @@ struct binop_with_nan_check<NanPropagation_t::PROPAGATE_NAN, opReduce, compType>
}
else
{
bool changed = false;
VOLATILE_WA_274384 bool changed = false;

opReduce{}(accuVal, currVal, changed);

Expand Down Expand Up @@ -527,9 +529,9 @@ struct BlockwiseReduction_2d_block_buffer
compType& accuData,
int& accuIndex)
{
const index_t thread_local_id = get_thread_local_1d_id();
compType lAccuData = opReduce::GetZeroVal();
int lAccuIndex = 0;
const index_t thread_local_id = get_thread_local_1d_id();
compType lAccuData = opReduce::GetZeroVal();
VOLATILE_WA_274384 int lAccuIndex = 0;

static_if<blockIsOneRow>{}([&](auto) {
for(index_t otherDimInd = 0; otherDimInd < toReduceBlocks; otherDimInd++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@

#include "float_type.hpp"

#define WORKAROUND_SWDEV_274384 (HIP_PACKAGE_VERSION_FLAT >= 4002021203ULL)

#if WORKAROUND_SWDEV_274384
#define VOLATILE_WA_274384 volatile
#else
#define VOLATILE_WA_274384
#endif

// this enumerate should be synchronized with include/miopen/reduce_common.hpp
namespace ck {
enum class ReductionMethod_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,13 @@ struct Max
a = b;
}

__device__ inline constexpr void operator()(T& a, T b, bool& changed) const
__device__ inline constexpr void operator()(T& a, T b, VOLATILE_WA_274384 bool& changed) const
{
if(a < b)
{
a = b;
changed = true;
}
else
changed = false;
}

static constexpr bool indexable = true;
Expand All @@ -120,15 +118,13 @@ struct Min
a = b;
}

__device__ inline constexpr void operator()(T& a, T b, bool& changed) const
__device__ inline constexpr void operator()(T& a, T b, VOLATILE_WA_274384 bool& changed) const
{
if(a > b)
{
a = b;
changed = true;
}
else
changed = false;
}

static constexpr bool indexable = true;
Expand Down
6 changes: 3 additions & 3 deletions src/reducetensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle,
std::to_string(compType) + "IN";
for(auto dimLen : inDescLengths)
network_config += std::to_string(dimLen) + "_";
network_config += "OUT";
for(auto dimLen : outDescLengths)
network_config += std::to_string(dimLen) + "_";
network_config += "RED";
for(auto dim : toReduceDims)
network_config += std::to_string(dim) + "_";
network_config += "BSIZE_" + std::to_string(blockSize);

// kernel for the first call
Expand Down
3 changes: 1 addition & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1085,8 +1085,7 @@ if(MIOPEN_TEST_CONV)
endif()

if(MIOPEN_TEST_FLOAT)
# WORKAROUND_SWDEV_291479
# add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED COMMAND $<TARGET_FILE:test_reduce_test> --double --all --verbose)
add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED COMMAND $<TARGET_FILE:test_reduce_test> --double --all --verbose)
endif()

# Add here regression tests that should be run only on Vega10/20 and only with FP16.
Expand Down
52 changes: 40 additions & 12 deletions test/reduce_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,11 +820,21 @@ struct reduce_driver : test_driver
max_value =
miopen_type<T>{} == miopenHalf ? 13 : miopen_type<T>{} == miopenInt8 ? 127 : 17;

// default data gneration (used by MIN/MAX)
auto gen_value = [&](auto... is) {
return (tensor_elem_gen_integer{max_value}(is...) *
tensor_elem_gen_checkboard_sign{}(is...));
};

// data generation used by ADD/AVG, data is distributed around 1.0 rather than 0.0, very low
// probability to get a reduced result of zero-value
auto gen_value_1 = [&](auto... is) {
auto rand_value = tensor_elem_gen_integer{max_value}(is...);
auto sign_value = tensor_elem_gen_checkboard_sign{}(is...);

return (sign_value * rand_value + 1.0);
};

// Special data generation for MUL, to avoid all-zero and large accumulative error in the
// reduced result
auto gen_value_2 = [&](auto... is) {
Expand All @@ -835,12 +845,7 @@ struct reduce_driver : test_driver
: (rand_value + max_value + 1) / (rand_value + max_value);
};

bool need_indices =
((reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX ||
reduceOp == MIOPEN_REDUCE_TENSOR_AMAX) &&
indicesOpt != MIOPEN_REDUCE_TENSOR_NO_INDICES);

// Special data generation for MIN/MAX/AMAX using a space of limitless number of values.
// Special data generation for NORM1 and NORM2 using a space of limitless number of values.
// This method is slower due to the use of rand(), it is usually used for manual testing
auto gen_value_3 = [&](auto... is) {
auto rand_upper = tensor_elem_gen_integer{max_value}(is...);
Expand All @@ -850,6 +855,14 @@ struct reduce_driver : test_driver
return rand_upper * sign_value * rand_ratio;
};

// Special data generation for AMAX, no zero value used
auto gen_value_4 = [&](auto... is) {
auto rand_value = tensor_elem_gen_integer{max_value}(is...);
auto sign_value = tensor_elem_gen_checkboard_sign{}(is...);

return sign_value > 0.0 ? (rand_value + 0.5) : (-1.0 * rand_value - 0.5);
};

// default tolerance (refer to driver.hpp)
this->tolerance = 80;

Expand All @@ -866,12 +879,27 @@ struct reduce_driver : test_driver
if(std::is_same<T, half_float::half>::value)
this->tolerance *= this->tolerance * 10.0;

auto inputTensor = (reduceOp == MIOPEN_REDUCE_TENSOR_MUL)
? tensor<T>{this->inLengths}.generate(gen_value_2)
: (need_indices || reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 ||
reduceOp == MIOPEN_REDUCE_TENSOR_NORM2
? tensor<T>{this->inLengths}.generate(gen_value_3)
: tensor<T>{this->inLengths}.generate(gen_value));
tensor<T> inputTensor;

switch(reduceOp)
{
case MIOPEN_REDUCE_TENSOR_ADD:
case MIOPEN_REDUCE_TENSOR_AVG:
inputTensor = tensor<T>{this->inLengths}.generate(gen_value_1);
break;
case MIOPEN_REDUCE_TENSOR_MUL:
inputTensor = tensor<T>{this->inLengths}.generate(gen_value_2);
break;
case MIOPEN_REDUCE_TENSOR_NORM1:
case MIOPEN_REDUCE_TENSOR_NORM2:
inputTensor = tensor<T>{this->inLengths}.generate(gen_value_3);
break;
case MIOPEN_REDUCE_TENSOR_AMAX:
inputTensor = tensor<T>{this->inLengths}.generate(gen_value_4);
break;
default: inputTensor = tensor<T>{this->inLengths}.generate(gen_value);
};

auto outputTensor = tensor<T>{outLengths};

std::fill(outputTensor.begin(), outputTensor.end(), convert_type<T>(0.0f));
Expand Down

0 comments on commit aabe099

Please sign in to comment.