Skip to content

Commit

Permalink
[API] NCHWc tensor layout support (#1532)
Browse files Browse the repository at this point in the history
  • Loading branch information
aska-0096 authored May 27, 2022
1 parent f3b4f18 commit 93d1476
Show file tree
Hide file tree
Showing 24 changed files with 920 additions and 108 deletions.
88 changes: 62 additions & 26 deletions driver/conv_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ class ConvDriver : public Driver
// defined in MIOpen lib.
// layout_type - input value supplied with MIOpen driver command.
void ValidateLayoutInputParameters(std::string layout_type);
void ValidateVectorizedParameters(int vector_dim, int vector_length);

// Helper function to check the Layout type short names
// Short names are defined as I,O,f. W.r.t In/Out/fil layout
Expand Down Expand Up @@ -456,6 +457,7 @@ int ConvDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
{
std::string in_layoutValue = inflags.GetValueStr("in_layout");
ValidateLayoutInputParameters(in_layoutValue);
inflags.SetValue("in_layout", in_layoutValue);
}
// fil layout argument value check
if(inflags.GetValueStr("fil_layout").empty())
Expand All @@ -466,6 +468,7 @@ int ConvDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
{
std::string fil_layoutValue = inflags.GetValueStr("fil_layout");
ValidateLayoutInputParameters(fil_layoutValue);
inflags.SetValue("fil_layout", fil_layoutValue);
}
// out layout argument check
if(inflags.GetValueStr("out_layout").empty())
Expand All @@ -476,7 +479,24 @@ int ConvDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
{
std::string out_layoutValue = inflags.GetValueStr("out_layout");
ValidateLayoutInputParameters(out_layoutValue);
inflags.SetValue("out_layout", out_layoutValue);
}

// vectorized tensor Dimension & Length check
int vector_dim = inflags.GetValueInt("tensor_vect");
int vector_length = inflags.GetValueInt("vector_length");

ValidateVectorizedParameters(vector_dim, vector_length);
if(vector_length != 1 && vector_dim == 1)
{
inflags.SetValue("in_layout",
inflags.GetValueStr("in_layout") + "c" + std::to_string(vector_length));
inflags.SetValue("fil_layout",
inflags.GetValueStr("fil_layout") + "c" + std::to_string(vector_length));
inflags.SetValue("out_layout",
inflags.GetValueStr("out_layout") + "c" + std::to_string(vector_length));
}

num_iterations = inflags.GetValueInt("iter");
if(num_iterations < 1)
{
Expand Down Expand Up @@ -538,7 +558,8 @@ void ConvDriver<Tgpu, Tref>::ValidateLayoutInputParameters(std::string layout_va
else
{
if((layout_value.compare("NCHW") == 0) || (layout_value.compare("NHWC") == 0) ||
(layout_value.compare("NCDHW") == 0) || (layout_value.compare("NDHWC") == 0))
(layout_value.compare("CHWN") == 0) || (layout_value.compare("NCDHW") == 0) ||
(layout_value.compare("NDHWC") == 0))
{
// do nothing,Values are matching as defined in Lib.
}
Expand All @@ -550,6 +571,22 @@ void ConvDriver<Tgpu, Tref>::ValidateLayoutInputParameters(std::string layout_va
}
}

template <typename Tgpu, typename Tref>
void ConvDriver<Tgpu, Tref>::ValidateVectorizedParameters(int vector_dim, int vector_length)
{
if(((vector_length == 4 || vector_length == 8) && vector_dim == 1) ||
(vector_length == 0 && vector_dim == 0))
{
// do nothing,Values are matching as defined in Lib.
}
else
{
std::cerr << "Invalid Tensor Vectorization Parameter Value - "
<< "vector_dim:" << vector_dim << "vector_length:" << vector_length << std::endl;
exit(EXIT_FAILURE);
}
}

template <typename Tgpu, typename Tref>
int ConvDriver<Tgpu, Tref>::ChkLayout_ShortName()
{
Expand Down Expand Up @@ -594,7 +631,16 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
SetConvDescriptorFromCmdLineArgs();

std::vector<int> out_len = GetOutputTensorLengths();

if(miopen::deref(inputTensor).GetLayout_t() == miopenTensorNCHWc4 ||
miopen::deref(inputTensor).GetLayout_t() == miopenTensorNCHWc8)
{
out_len[1] *= miopen::deref(inputTensor).GetVectorLength();
}
if(miopen::deref(inputTensor).GetLayout_t() == miopenTensorCHWNc4 ||
miopen::deref(inputTensor).GetLayout_t() == miopenTensorCHWNc8)
{
out_len[0] *= miopen::deref(inputTensor).GetVectorLength();
}
miopenDataType_t y_type =
(data_type == miopenInt8 || data_type == miopenInt8x4) ? miopenInt32 : data_type;
SetTensorNd(outputTensor, out_len, inflags.GetValueStr("out_layout"), y_type);
Expand Down Expand Up @@ -737,6 +783,8 @@ int ConvDriver<Tgpu, Tref>::AddCmdLineArgs()
"0",
"tensor vectorization type (none, vect_c, vect_n) (Default=0)",
"int");
inflags.AddInputFlag(
"vector_length", 'L', "1", "tensor vectorization length (Default=1)", "int");
inflags.AddInputFlag("dilation_d", '^', "1", "Dilation of Filter Depth (Default=1)", "int");
inflags.AddInputFlag("dilation_h", 'l', "1", "Dilation of Filter Height (Default=1)", "int");
inflags.AddInputFlag("dilation_w", 'j', "1", "Dilation of Filter Width (Default=1)", "int");
Expand Down Expand Up @@ -1203,17 +1251,13 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
}

if(is_fwd || is_wrw)
in = tensor<Tgpu>(miopen::deref(inputTensor).GetLengths(),
miopen::deref(inputTensor).GetStrides());
in = tensor<Tgpu>(miopen::deref(inputTensor));
if(is_fwd || is_bwd)
wei = tensor<Tgpu>(miopen::deref(weightTensor).GetLengths(),
miopen::deref(weightTensor).GetStrides());
wei = tensor<Tgpu>(miopen::deref(weightTensor));
if(is_fwd)
out = tensor<Tgpu>(miopen::deref(outputTensor).GetLengths(),
miopen::deref(outputTensor).GetStrides());
out = tensor<Tgpu>(miopen::deref(outputTensor));
if(is_bwd || is_wrw)
dout = tensor<Tgpu>(miopen::deref(outputTensor).GetLengths(),
miopen::deref(outputTensor).GetStrides());
dout = tensor<Tgpu>(miopen::deref(outputTensor));

if(is_bwd)
din = std::vector<Tgpu>(in_sz, static_cast<Tgpu>(0));
Expand All @@ -1229,12 +1273,9 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
new GPUMem(ctx, GetTensorSize(weightTensor_vect4), sizeof(Tgpu)));
}

outhost = tensor<Tref>(miopen::deref(outputTensor).GetLengths(),
miopen::deref(outputTensor).GetStrides());
din_host = tensor<Tref>(miopen::deref(inputTensor).GetLengths(),
miopen::deref(inputTensor).GetStrides());
dwei_host = tensor<Tref>(miopen::deref(weightTensor).GetLengths(),
miopen::deref(weightTensor).GetStrides());
outhost = tensor<Tref>(miopen::deref(outputTensor));
din_host = tensor<Tref>(miopen::deref(inputTensor));
dwei_host = tensor<Tref>(miopen::deref(weightTensor));

std::string inFileName = inflags.GetValueStr("in_data");
std::string weiFileName = inflags.GetValueStr("weights");
Expand Down Expand Up @@ -1341,11 +1382,9 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
size_t b_sz = GetTensorSize(biasTensor);
b_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, b_sz, sizeof(Tgpu)));
db_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, b_sz, sizeof(Tgpu)));
b = tensor<Tgpu>(miopen::deref(biasTensor).GetLengths(),
miopen::deref(biasTensor).GetStrides());
b = tensor<Tgpu>(miopen::deref(biasTensor));
db = std::vector<Tgpu>(b_sz, static_cast<Tgpu>(0));
db_host = tensor<Tref>(miopen::deref(biasTensor).GetLengths(),
miopen::deref(biasTensor).GetStrides());
db_host = tensor<Tref>(miopen::deref(biasTensor));
for(int i = 0; i < b_sz; i++)
{
b.data[i] = static_cast<Tgpu>(i % 8) +
Expand Down Expand Up @@ -2147,8 +2186,7 @@ int ConvDriver<Tgpu, Tref>::RunForwardGPUReference()
out_dev->FromGPU(GetStream(), outhost.data.data());
else
{
auto out_tmp = tensor<Tgpu>(miopen::deref(outputTensor).GetLengths(),
miopen::deref(outputTensor).GetStrides());
auto out_tmp = tensor<Tgpu>(miopen::deref(outputTensor));
out_dev->FromGPU(GetStream(), out_tmp.data.data());
for(int i = 0; i < out_tmp.data.size(); i++)
{
Expand Down Expand Up @@ -3064,8 +3102,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardWeightsGPUReference()
dwei_dev->FromGPU(GetStream(), dwei_host.data.data());
else
{
auto dwei_tmp = tensor<Tgpu>(miopen::deref(weightTensor).GetLengths(),
miopen::deref(weightTensor).GetStrides());
auto dwei_tmp = tensor<Tgpu>(miopen::deref(weightTensor));
dwei_dev->FromGPU(GetStream(), dwei_tmp.data.data());
for(int i = 0; i < dwei_tmp.data.size(); i++)
{
Expand Down Expand Up @@ -3113,8 +3150,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardDataGPUReference()
din_dev->FromGPU(GetStream(), din_host.data.data());
else
{
auto din_tmp = tensor<Tgpu>(miopen::deref(inputTensor).GetLengths(),
miopen::deref(inputTensor).GetStrides());
auto din_tmp = tensor<Tgpu>(miopen::deref(inputTensor).GetType());
din_dev->FromGPU(GetStream(), din_tmp.data.data());
for(int i = 0; i < din_tmp.data.size(); i++)
{
Expand Down
8 changes: 4 additions & 4 deletions driver/driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
[[gnu::noreturn]] inline void Usage()
{
printf("Usage: ./driver *base_arg* *other_args*\n");
printf(
"Supported Base Arguments: conv[fp16|int8|bfp16], CBAInfer[fp16], pool[fp16], lrn[fp16], "
"activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], "
"tensorop[fp16], reduce[fp16,fp64]\n");
printf("Supported Base Arguments: conv[fp16|fp16x4|fp16x8|int8|bfp16], CBAInfer[fp16], "
"pool[fp16], lrn[fp16], "
"activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], "
"tensorop[fp16], reduce[fp16,fp64]\n");
exit(0); // NOLINT (concurrency-mt-unsafe)
}

Expand Down
83 changes: 80 additions & 3 deletions driver/tensor_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,55 @@
#include <numeric>
#include <vector>

inline miopenTensorLayout_t StringToLayoutType(std::string layout)
{
miopenTensorLayout_t default_layout = miopenTensorNCHW;
if(layout == "NCHWc4")
return miopenTensorNCHWc4;
else if(layout == "NCHWc8")
return miopenTensorNCHWc8;
else if(layout == "CHWNc4")
return miopenTensorCHWNc4;
else if(layout == "CHWNc8")
return miopenTensorCHWNc8;
else
{
MIOPEN_THROW("We only support NCHWc4, NCHWc8, CHWNc4, CHWNc8 vectorized tensor layout.");
return default_layout;
}
}

inline void LengthReorder(std::vector<int>& lens, const std::initializer_list<int>& indices)
{
std::vector<int> out_lens;
out_lens.reserve(indices.size());
for(int index : indices)
{
assert(0 <= index && index < lens.size());
out_lens.push_back(std::move(lens[index]));
}
lens = std::move(out_lens);
}

inline int GetTensorVectorLength(miopenTensorDescriptor_t& tensor)
{
int vectorLength;

int size = 0;
miopenGetTensorDescriptorSize(tensor, &size);

if(size == 4)
{
miopenGetNdTensorDescriptorVectorLength(tensor, &vectorLength);
return vectorLength;
}
else
{
MIOPEN_THROW("We only support 4D layout in vector format");
}
return 0;
}

inline std::vector<int> GetTensorLengths(miopenTensorDescriptor_t& tensor)
{
int n;
Expand Down Expand Up @@ -104,6 +153,27 @@ inline int SetTensor4d(miopenTensorDescriptor_t t,
return miopenSet4dTensorDescriptor(t, data_type, UNPACK_VEC4(len));
}

inline int SetTensorNdVector(miopenTensorDescriptor_t t,
std::vector<int>& len,
miopenTensorLayout_t layout,
miopenDataType_t data_type = miopenFloat)
{
if(layout == miopenTensorNCHWc4 || layout == miopenTensorNCHWc8)
{
// Do nothing, MIOpen implicit logic that lens are in NCHW order.
}
else if(layout == miopenTensorCHWNc4 || layout == miopenTensorCHWNc8)
{
LengthReorder(len, {1, 2, 3, 0});
}
else
{
MIOPEN_THROW("We only support NCHWc4, NCHWc8, CHWNc4, CHWNc8 vectorized tensor layout.");
return -1;
}
return miopenSetNdTensorDescriptorWithLayout(t, data_type, layout, len.data(), len.size());
}

inline int SetTensorNd(miopenTensorDescriptor_t t,
std::vector<int>& len,
miopenDataType_t data_type = miopenFloat)
Expand All @@ -129,11 +199,16 @@ inline int SetTensorNd(miopenTensorDescriptor_t t,
return SetTensorNd(t, len, data_type);
}

if(layout.size() != len.size())
if(layout.size() != len.size() && layout.find("c") == std::string::npos)
{
MIOPEN_THROW("unmatched layout and dimension size");
}

if(layout.find("c") != std::string::npos)
{
return SetTensorNdVector(t, len, StringToLayoutType(layout), data_type);
}

// Dimension lengths vector 'len' comes with a default layout.
std::string len_layout = miopen::tensor_layout_get_default(layout.size());
if(len_layout.empty())
Expand All @@ -155,8 +230,9 @@ inline size_t GetTensorSize(miopenTensorDescriptor_t& tensor)
{
assert(miopen::deref(tensor).IsPacked() &&
"GetTensorSize should not be used on an unpacked tensor.");
const auto len = GetTensorLengths(tensor);
size_t sz = std::accumulate(len.begin(), len.end(), size_t{1}, std::multiplies<size_t>());
const auto len = GetTensorLengths(tensor);
const auto vectorLength = GetTensorVectorLength(tensor);
size_t sz = std::accumulate(len.begin(), len.end(), vectorLength, std::multiplies<size_t>());

return sz;
}
Expand All @@ -168,4 +244,5 @@ inline size_t GetTensorSpace(miopenTensorDescriptor_t& tensor)
{
return miopen::deref(tensor).GetElementSpace();
}

#endif // GUARD_MIOPEN_TENSOR_DRIVER_HPP
34 changes: 34 additions & 0 deletions include/miopen/miopen.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,24 @@ typedef enum
miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */
} miopenDataType_t;

/*! @ingroup tensor
* @enum miopenTensorLayout_t
* Tensor layouts supported by MIOpen.
* miopenTensorCHWNc4 and miopenTensorCHWNc8 layout only support weight tensor.
*/
typedef enum
{
miopenTensorNCHW = 0, /*!< NCHW memory layout (Fully supported) */
miopenTensorNHWC = 1, /*!< NHWC memory layout (Fully supported) */
miopenTensorCHWN = 2, /*!< CHWN memory layout (Not supported) */
miopenTensorNCHWc4 = 3, /*!< NCHWc4 memory layout (Partially supported) */
miopenTensorNCHWc8 = 4, /*!< NCHWc8 memory layout (Partially supported) */
miopenTensorCHWNc4 = 5, /*!< CHWNc4 memory layout (Partially supported) */
miopenTensorCHWNc8 = 6, /*!< CHWNc8 memory layout (Partially supported) */
miopenTensorNCDHW = 7, /*!< NCDHW memory layout (Fully supported) */
miopenTensorNDHWC = 8, /*!< NCDHW memory layout (Fully supported) */
} miopenTensorLayout_t;

/*! @ingroup pooling
* @enum miopenIndexType_t
* MIOpen index datatypes.
Expand Down Expand Up @@ -580,6 +598,22 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor
MIOPEN_EXPORT miopenStatus_t miopenSet4dTensorDescriptor(
miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w);

/*! @brief Set shape of ND tensor with specific layout
*
* Interface for setting N-D tensor shape. This interface support NHWC, NCHW, NCHWc*, CHWNc*
* @param tensorDesc Tensor descriptor type (output)
* @param dataType MIOpen datatype (input)
* @param tensorLayout Tensor layout (input)
* @param lens Tensor dimensions (input)
* @param num_lens Tensor dimension size (input)
* @return miopenStatus_t
*/
MIOPEN_EXPORT miopenStatus_t
miopenSetNdTensorDescriptorWithLayout(miopenTensorDescriptor_t tensorDesc,
miopenDataType_t dataType,
miopenTensorLayout_t tensorLayout,
int* lens,
int num_lens);
/*! @brief Set shape and stride of 4D tensor
*
* Interface for setting 4-D tensor shape and stride.
Expand Down
2 changes: 1 addition & 1 deletion src/conv/problem_description.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void ProblemDescription::HeuristicUpdateLayouts()
{
const std::string labels = tensor_layout_get_default(in_layout.size());

static const std::vector<std::string> supported_layouts = {"NCHW", "NHWC", "NCDHW"};
static const std::vector<std::string> supported_layouts = {"NCHW", "NHWC", "CHWN", "NCDHW"};
for(const std::string& layout : supported_layouts)
{
// Skip layouts that doesn't match dimension sizes
Expand Down
Loading

0 comments on commit 93d1476

Please sign in to comment.