Skip to content

Commit

Permalink
[Tensor reorder][Quality][#issue 1476] Improve naming style and CTest…
Browse files Browse the repository at this point in the history
… design (#1481)
  • Loading branch information
aska-0096 authored Apr 7, 2022
1 parent 2bf2666 commit fb4590d
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 329 deletions.
12 changes: 6 additions & 6 deletions src/conv/invokers/impl_gemm_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,9 +569,9 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory(
if(!trans_output_skippable)
opArgsTrans.emplace_back(trans_output.GetKernelArg());

trans_input_size = trans_input_skippable ? 0 : trans_input.GetSize();
trans_weight_size = trans_weight_skippable ? 0 : trans_weight.GetSize();
trans_output_size = trans_output_skippable ? 0 : trans_output.GetSize();
trans_input_size = trans_input_skippable ? 0 : trans_input.GetOutputTensorSize();
trans_weight_size = trans_weight_skippable ? 0 : trans_weight.GetOutputTensorSize();
trans_output_size = trans_output_skippable ? 0 : trans_output.GetOutputTensorSize();

int idx = 0;
if(!trans_input_skippable)
Expand Down Expand Up @@ -887,9 +887,9 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory(
if(!trans_output_skippable)
opArgsTrans.emplace_back(trans_output.GetKernelArg());

trans_input_size = trans_input_skippable ? 0 : trans_input.GetSize();
trans_weight_size = trans_weight_skippable ? 0 : trans_weight.GetSize();
trans_output_size = trans_output_skippable ? 0 : trans_output.GetSize();
trans_input_size = trans_input_skippable ? 0 : trans_input.GetOutputTensorSize();
trans_weight_size = trans_weight_skippable ? 0 : trans_weight.GetOutputTensorSize();
trans_output_size = trans_output_skippable ? 0 : trans_output.GetOutputTensorSize();

int idx = 0;
if(!trans_input_skippable)
Expand Down
8 changes: 5 additions & 3 deletions src/hip/batched_transpose_sol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ BatchedTransposeSolution::BatchedTransposeSolution(const ExecutionContext& ctx,
kernel_param_heuristic = batched_transpose::HeuristicGet(data_size, batch, height, width);
}

solver::KernelInfo BatchedTransposeSolution::GetKernel() const
solver::KernelInfo BatchedTransposeSolution::GetKernelInfo() const
{
std::size_t block_size = BATCHED_TRANSPOSE_BLOCK_SIZE;
#if BATCHED_TRANSPOSE_PERSISTENT
Expand All @@ -327,7 +327,7 @@ solver::KernelInfo BatchedTransposeSolution::GetKernel() const
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

MIOPEN_LOG_I2("BatchedTransposeSolution use kernel: " + kernel_name);
MIOPEN_LOG_T(kernel_name);

return kernel;
}
Expand All @@ -351,6 +351,8 @@ std::vector<OpKernelArg> BatchedTransposeSolution::GetKernelArg() const
opArgs.emplace_back(0); // placeholder
opArgs.emplace_back(height);
opArgs.emplace_back(width);
if(grid_size != static_cast<uint32_t>(grid_size))
MIOPEN_THROW("Variable grid size can't be casted to uint32_t safely");
opArgs.emplace_back(static_cast<uint32_t>(grid_size));
opArgs.emplace_back(dim_total);
opArgs.emplace_back(magic_h.magic);
Expand All @@ -374,7 +376,7 @@ bool BatchedTransposeSolution::IsSkippable() const
return height == 1 || width == 1;
}

size_t BatchedTransposeSolution::GetSize() const
size_t BatchedTransposeSolution::GetOutputTensorSize() const
{
return miopen::GetTypeSize(data_type) * batch * height * width;
}
Expand Down
65 changes: 33 additions & 32 deletions src/hip/general_tensor_reorder_sol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
namespace miopen {
namespace tensor_reorder {

static inline std::string GetNameTrait(std::size_t type_size)
static inline std::string GetKernelNameType(std::size_t type_size)
{
if(type_size == 1)
return "byte";
Expand All @@ -59,47 +59,45 @@ static inline std::string GetKernelName(std::size_t data_size,
uint32_t order_3,
const GeneralReorderParam* kparam)
{
if(kparam == nullptr)
MIOPEN_THROW("Memory access fault, kparam is a nullptr");
std::ostringstream kernel_name;
std::string type_trait = GetNameTrait(data_size);
kernel_name << "general_4d_reorder_" << kparam->tile_x << "x" << kparam->tile_y << "_";
if(!(kparam->pack_x == 1 && kparam->pack_y == 1 && kparam->ediv_x == 1 && kparam->ediv_y == 1))
{
kernel_name << "pack_" << kparam->pack_x << "x" << kparam->pack_y << "_ediv_"
<< kparam->ediv_x << "x" << kparam->ediv_y << "_";
}
kernel_name << type_trait << "_r" << order_0 << order_1 << order_2 << order_3;
kernel_name << GetKernelNameType(data_size) << "_r" << order_0 << order_1 << order_2 << order_3;
return kernel_name.str();
}

static inline GeneralReorderParam
HeuristicGet(std::size_t data_size, uint32_t dim_0, uint32_t dim_1, uint32_t dim_2, uint32_t dim_3)
{
/*
* TODO:
* Design a algorithm to determine general tensor reorder tile size.
*/
///\todo Design a algorithm to determine general tensor reorder tile size.
GeneralReorderParam default_kernel;
if(data_size <= 8 && dim_0 >= 1 && dim_1 >= 1 && dim_2 >= 1 && dim_3 >= 1)
{
if(dim_3 >= 16)
{
return GeneralReorderParam{16, 256, 1, 1, 1, 1};
return GeneralReorderParam{16, TENSOR_REORDER_BLOCK_SIZE, 1, 1, 1, 1};
}
else if(dim_3 >= 8)
{
return GeneralReorderParam{8, 256, 1, 1, 1, 1};
return GeneralReorderParam{8, TENSOR_REORDER_BLOCK_SIZE, 1, 1, 1, 1};
}
else if(dim_3 >= 4)
{
return GeneralReorderParam{4, 256, 1, 1, 1, 1};
return GeneralReorderParam{4, TENSOR_REORDER_BLOCK_SIZE, 1, 1, 1, 1};
}
else if(dim_3 >= 2)
{
return GeneralReorderParam{2, 256, 1, 1, 1, 1};
return GeneralReorderParam{2, TENSOR_REORDER_BLOCK_SIZE, 1, 1, 1, 1};
}
else
{
return GeneralReorderParam{1, 256, 1, 1, 1, 1};
return GeneralReorderParam{1, TENSOR_REORDER_BLOCK_SIZE, 1, 1, 1, 1};
}
}
else
Expand All @@ -109,16 +107,15 @@ HeuristicGet(std::size_t data_size, uint32_t dim_0, uint32_t dim_1, uint32_t dim
}

} // namespace tensor_reorder
GeneralReorderSolution::GeneralReorderSolution(const ExecutionContext& ctx,
miopenDataType_t data_type_,
uint32_t dim_0_,
uint32_t dim_1_,
uint32_t dim_2_,
uint32_t dim_3_,
uint32_t order_0_,
uint32_t order_1_,
uint32_t order_2_,
uint32_t order_3_)
GenericReorderSolutionImpl::GenericReorderSolutionImpl(miopenDataType_t data_type_,
uint32_t dim_0_,
uint32_t dim_1_,
uint32_t dim_2_,
uint32_t dim_3_,
uint32_t order_0_,
uint32_t order_1_,
uint32_t order_2_,
uint32_t order_3_)
: data_type(data_type_),
dim_0(dim_0_),
dim_1(dim_1_),
Expand All @@ -131,12 +128,11 @@ GeneralReorderSolution::GeneralReorderSolution(const ExecutionContext& ctx,
{
if(data_type == miopenInt8x4)
MIOPEN_THROW("These data type are not supported");
num_cu = ctx.GetStream().GetMaxComputeUnits();
std::size_t data_size = miopen::GetTypeSize(data_type);
kernel_param_heuristic = tensor_reorder::HeuristicGet(data_size, dim_0, dim_1, dim_2, dim_3);
}

solver::KernelInfo GeneralReorderSolution::GetKernel() const
solver::KernelInfo GenericReorderSolutionImpl::GetKernelInfo() const
{
std::size_t block_size = TENSOR_REORDER_BLOCK_SIZE;
uint32_t pixel_total = dim_0 * dim_1 * dim_2 * dim_3;
Expand All @@ -157,12 +153,12 @@ solver::KernelInfo GeneralReorderSolution::GetKernel() const
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

MIOPEN_LOG_I2("GeneralReorderSolution use kernel: " + kernel_name);
MIOPEN_LOG_T(kernel_name);

return kernel;
}

std::vector<OpKernelArg> GeneralReorderSolution::GetKernelArg() const
std::vector<OpKernelArg> GenericReorderSolutionImpl::GetKernelArg() const
{
std::size_t block_size = TENSOR_REORDER_BLOCK_SIZE;
uint32_t pixel_total = dim_0 * dim_1 * dim_2 * dim_3;
Expand All @@ -181,6 +177,8 @@ std::vector<OpKernelArg> GeneralReorderSolution::GetKernelArg() const
opArgs.emplace_back(dim_1);
opArgs.emplace_back(dim_2);
opArgs.emplace_back(dim_3);
if(grid_size != static_cast<uint32_t>(grid_size))
MIOPEN_THROW("Variable grid size can't be casted to uint32_t safely");
opArgs.emplace_back(static_cast<uint32_t>(grid_size));
opArgs.emplace_back(dim_total);
opArgs.emplace_back(magic_stride0.magic);
Expand All @@ -193,20 +191,23 @@ std::vector<OpKernelArg> GeneralReorderSolution::GetKernelArg() const
return opArgs;
}

std::string GeneralReorderSolution::GetKernelName() const
std::string GenericReorderSolutionImpl::GetKernelName() const
{
std::size_t data_size = miopen::GetTypeSize(data_type);
return tensor_reorder::GetKernelName(
data_size, order_0, order_1, order_2, order_3, &kernel_param_heuristic);
return tensor_reorder::GetKernelName(miopen::GetTypeSize(data_type),
order_0,
order_1,
order_2,
order_3,
&kernel_param_heuristic);
}

bool GeneralReorderSolution::IsSkippable() const
bool GenericReorderSolutionImpl::IsSkippable() const
{
// Disable the IsSkippable funciton
return dim_0 == 0 || dim_1 == 0 || dim_2 == 0 || dim_3 == 0;
}

size_t GeneralReorderSolution::GetSize() const
size_t GenericReorderSolutionImpl::GetOutputTensorSize() const
{
return miopen::GetTypeSize(data_type) * dim_0 * dim_1 * dim_2 * dim_3;
}
Expand Down
4 changes: 2 additions & 2 deletions src/include/miopen/batched_transpose_sol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ struct BatchedTransposeSolution
uint32_t batch_,
uint32_t height_,
uint32_t width_);
solver::KernelInfo GetKernel() const;
solver::KernelInfo GetKernelInfo() const;
std::vector<OpKernelArg> GetKernelArg() const;
std::string GetKernelName() const;
bool IsSkippable() const;
size_t GetSize() const;
size_t GetOutputTensorSize() const;

miopenDataType_t data_type;
uint32_t batch;
Expand Down
28 changes: 13 additions & 15 deletions src/include/miopen/general_tensor_reorder_sol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
#ifndef GUARD_GENERAL_MIOPEN_TENSOR_REORDER_SOL_HPP
#define GUARD_GENERAL_MIOPEN_TENSOR_REORDER_SOL_HPP

#include <miopen/miopen.h>
#include <miopen/kernel_info.hpp>
#include <miopen/op_kernel_args.hpp>
#include <miopen/execution_context.hpp>
#include <cstdint>
#include <vector>

namespace miopen {
Expand All @@ -44,24 +44,23 @@ struct GeneralReorderParam
int ediv_y{0};
};

struct GeneralReorderSolution
struct GenericReorderSolutionImpl
{
GeneralReorderSolution(const ExecutionContext& ctx_,
miopenDataType_t data_type_,
uint32_t dim_0_,
uint32_t dim_1_,
uint32_t dim_2_,
uint32_t dim_3_,
uint32_t order_0_,
uint32_t order_1_,
uint32_t order_2_,
uint32_t order_3_);
GenericReorderSolutionImpl(miopenDataType_t data_type_,
uint32_t dim_0_,
uint32_t dim_1_,
uint32_t dim_2_,
uint32_t dim_3_,
uint32_t order_0_,
uint32_t order_1_,
uint32_t order_2_,
uint32_t order_3_);
// TODO batched transpose API
solver::KernelInfo GetKernel() const;
solver::KernelInfo GetKernelInfo() const;
std::vector<OpKernelArg> GetKernelArg() const;
std::string GetKernelName() const;
bool IsSkippable() const;
size_t GetSize() const;
size_t GetOutputTensorSize() const;

miopenDataType_t data_type;
uint32_t dim_0;
Expand All @@ -72,7 +71,6 @@ struct GeneralReorderSolution
uint32_t order_1;
uint32_t order_2;
uint32_t order_3;
int num_cu;

GeneralReorderParam kernel_param_heuristic;
};
Expand Down
Loading

0 comments on commit fb4590d

Please sign in to comment.