Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into asm_igemm_nhwc_fwd…
Browse files Browse the repository at this point in the history
…_bwd
  • Loading branch information
carlushuang committed Jun 23, 2021
2 parents a6ffe3b + af4c67e commit 21bcc15
Show file tree
Hide file tree
Showing 21 changed files with 8,448 additions and 516 deletions.
15 changes: 6 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ RUN cget -p $PREFIX init --cxx /opt/rocm/llvm/bin/clang++ --std=c++14 -DAMDGPU_T
# Install dependencies
RUN cget -p $PREFIX install pfultz2/rocm-recipes
# Install a newer version of cmake for libMLIRMIOpen
RUN cget -p $PREFIX install kitware/[email protected]
RUN cget -p $PREFIX install kitware/[email protected]

ADD min-requirements.txt /min-requirements.txt
RUN CXXFLAGS='-isystem $PREFIX/include' cget -p $PREFIX install -f /min-requirements.txt
RUN cget -p $PREFIX install danmar/cppcheck@dd05839a7e63ef04afd34711cb3e1e0ef742882f
Expand All @@ -92,19 +93,15 @@ RUN pip install -r /doc-requirements.txt
RUN if [ "$USE_TARGETID" = "ON" ] ; then export HIPCC_LINK_FLAGS_APPEND='-O3 -parallel-jobs=4' && export HIPCC_COMPILE_FLAGS_APPEND='-O3 -Wno-format-nonliteral -parallel-jobs=4' && rm /usr/bin/hipcc; fi

# install last released miopentensile in default (master), install latest commits when MIOTENSILE_VER="latest" (develop)
RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@be26d30d3d7509a414134a45f4a6d49e5da250b8; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@4bfe00a8de61d12862d9fa803b8ea9a981a50f97; fi
RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@4fda8d57c6b088333b0392ba0617b0d6eec5d5b7; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@403fc13acb8518c3f82a79dc501b21ef1751e470; fi

RUN cd ~ && \
export MLIR_COMMIT=bbce2f3216e013efe59d7e9c021b4896f89176b0 && \
export MLIR_COMMIT=44abc4783fe2f6b4415871f7c44aa52ab89bccab && \
wget https://github.com/ROCmSoftwarePlatform/llvm-project-mlir/archive/$MLIR_COMMIT.tar.gz && \
tar -xvzf $MLIR_COMMIT.tar.gz && \
rm -rf $MLIR_COMMIT.tar.gz && \
cd llvm-project-mlir-$MLIR_COMMIT && mkdir -p build && cd build && \
$PREFIX/bin/cmake -G "Unix Makefiles" ../llvm \
-DLLVM_ENABLE_PROJECTS="mlir;lld" \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DLLVM_BUILD_LLVM_DYLIB=OFF \
-DLLVM_ENABLE_TERMINFO=OFF && \
$PREFIX/bin/cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_FAT_LIBMLIRMIOPEN=1 && \
make -j$(nproc) libMLIRMIOpen && \
$PREFIX/bin/cmake --install . --component libMLIRMIOpen --prefix /opt/rocm && \
cd ~ && rm -rf llvm-project-mlir-$MLIR_COMMIT
48 changes: 26 additions & 22 deletions Jenkinsfile

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9_stride_2_dil.inc
kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9_stride_2_dec.inc
kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9.inc
kernels/Conv_Winograd_v21_1_2_gfx9_f3x2_fp32_stride1_group.inc
kernels/Conv_Winograd_v21_1_2_gfx10_f3x2_fp32_stride1_group.inc
kernels/Conv_Winograd_v21_1_2_gfx9_fp16_dot2_edc_dilation2.inc
kernels/Conv_Winograd_v21_1_2_gfx9_fp16_dot2_edc_stride1.inc
kernels/Conv_Winograd_v21_1_2_gfx9_fp16_dot2_edc_stride2.inc
Expand Down Expand Up @@ -423,6 +425,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
kernels/conv_3x3_wheel_alpha_v3_0b.s
kernels/conv_3x3_wheel_alpha_v9_2_7.s
kernels/conv_3x3_wheel_alpha_v9_2_7_stride_2_dec.s
kernels/Conv_Winograd_v21_1_2_f3x2_fp32_stride1_group.s
kernels/Conv_Winograd_v21_1_2_fp16_dot2_edc_dilation2.s
kernels/Conv_Winograd_v21_1_2_fp16_dot2_edc_stride1.s
kernels/Conv_Winograd_v21_1_2_fp16_dot2_edc_stride2.s
Expand Down
36 changes: 23 additions & 13 deletions src/conv/invokers/mlir_impl_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,27 +101,37 @@ void ComputeMlirDimsStrides(const conv::ProblemDescription& conv_problem,
{
auto group_count = conv_problem.GetGroupCount();

TensorDescriptor in;
if(conv_problem.GetDirection() == conv::Direction::Forward)
in = conv_problem.GetIn();
else
in = conv_problem.GetOut();

in_dims = in.GetLengths();
in_strides = in.GetStrides();
PermuteDimsStrides(in_dims, in_strides);
// Add a virtual group dimension before input channel.
const TensorDescriptor& in = conv_problem.GetIn();
in_dims = in.GetLengths();
in_strides = in.GetStrides();
InsertGToDimsStrides(in.GetLayout("NCHW"), 'C', group_count, in_dims, in_strides);
PermuteDimsStrides(in_dims, in_strides);

// Add a virtual group dimension before output channel.
const TensorDescriptor& weights = conv_problem.GetWeights();
weights_dims = weights.GetLengths();
weights_strides = weights.GetStrides();
PermuteDimsStrides(weights_dims, weights_strides);
InsertGToDimsStrides(
weights.GetLayout("NCHW"), 'N', group_count, weights_dims, weights_strides);
PermuteDimsStrides(weights_dims, weights_strides);

TensorDescriptor out;
if(conv_problem.GetDirection() == conv::Direction::Forward)
out = conv_problem.GetOut();
else
out = conv_problem.GetIn();

out_dims = out.GetLengths();
out_strides = out.GetStrides();
PermuteDimsStrides(out_dims, out_strides);
// Add a virtual group dimension before output channel.
const TensorDescriptor& out = conv_problem.GetOut();
out_dims = out.GetLengths();
out_strides = out.GetStrides();
InsertGToDimsStrides(out.GetLayout("NCHW"), 'C', group_count, out_dims, out_strides);
PermuteDimsStrides(out_dims, out_strides);
}

MlirConvArgs MakeMlirConvArgs(const std::vector<size_t>& in_dims,
Expand All @@ -138,11 +148,11 @@ MlirConvArgs MakeMlirConvArgs(const std::vector<size_t>& in_dims,
std::copy(strides.cbegin(), strides.cend(), &target.strides[0]);
};

StridedMemRef5D filter{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
StridedMemRef5D filter{nullptr, nullptr, 0, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
initDimStrides(weights_dims, weights_strides, filter);
StridedMemRef5D input{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
StridedMemRef5D input{nullptr, nullptr, 0, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
initDimStrides(in_dims, in_strides, input);
StridedMemRef5D output{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
StridedMemRef5D output{nullptr, nullptr, 0, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
initDimStrides(out_dims, out_strides, output);

return {filter, input, output};
Expand Down Expand Up @@ -255,7 +265,7 @@ InvokerFactory MakeMlirBwdInvokerFactory(const ConvolutionContext& ctx)
elapsed += handle.GetKernelTime();
}

SetMlirConvArgsPtr(tensors.in, tensors.out, tensors.w, args);
SetMlirConvArgsPtr(tensors.out, tensors.in, tensors.w, args);
for(const auto& k : kernels)
{
handle.Run(k)(args);
Expand Down
38 changes: 37 additions & 1 deletion src/include/miopen/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,11 +1451,47 @@ struct ConvBinWinogradRxS : SolverBase<ConvolutionContext>
ConvSolution GetSolution(const ConvolutionContext& params) const;
};

struct PerformanceConfigConvBinWinogradRxSf3x2
: Serializable<PerformanceConfigConvBinWinogradRxSf3x2>
{
int n_groups;
PerformanceConfigConvBinWinogradRxSf3x2(int n_groups_);
PerformanceConfigConvBinWinogradRxSf3x2() : PerformanceConfigConvBinWinogradRxSf3x2(-1) {}
PerformanceConfigConvBinWinogradRxSf3x2(bool) : PerformanceConfigConvBinWinogradRxSf3x2(1) {}

template <class Self, class F>
static void Visit(Self&& self, F f)
{
f(self.n_groups, "n_groups");
}
int GetNGroups() const { return n_groups; }

void HeuristicInit(const ConvolutionContext& config);
bool IsValidValue() const;
bool SetNextValue();
bool IsValid(const ConvolutionContext& config) const;
bool operator==(const PerformanceConfigConvBinWinogradRxSf3x2& other) const;
std::string ToString() const;
};

struct ConvBinWinogradRxSf3x2 : SolverBase<ConvolutionContext>
{
PerformanceConfigConvBinWinogradRxSf3x2 GetPerformanceConfig(const ConvolutionContext&) const;
bool IsValidPerformanceConfig(const ConvolutionContext&,
const PerformanceConfigConvBinWinogradRxSf3x2&) const;
PerformanceConfigConvBinWinogradRxSf3x2 Search(const ConvolutionContext&,
const AnyInvokeParams& invoke_ctx) const;

bool IsApplicable(const ConvolutionContext& params) const;
bool IsDynamic() const { return true; }
ConvSolution GetSolution(const ConvolutionContext& params) const;
ConvSolution GetSolution(const ConvolutionContext& params,
const PerformanceConfigConvBinWinogradRxSf3x2& config,
bool disableConfigOverrideFromEnv = false) const;
static size_t GetNGroups(const size_t group_conv, const size_t grid_group_size)
{
assert(group_conv != 0);
return grid_group_size / group_conv;
}
};

struct PerformanceConfigConvBinWinogradRxSf2x3
Expand Down
40 changes: 40 additions & 0 deletions src/kernels/Conv_Winograd_v21_1_2_f3x2_fp32_stride1_group.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/
.include "Conv_Winograd_v21_1_2_metadata.inc"

KERNEL_PROLOG f3x2_fp32_stride1_group

.if (.amdgcn.gfx_generation_number == 9)
.if (.amdgcn.gfx_generation_stepping == 10)
.error "gfx90a is not supported yet"
.else
.include "Conv_Winograd_v21_1_2_gfx9_f3x2_fp32_stride1_group.inc"
.endif
.elseif (.amdgcn.gfx_generation_number == 10)
.include "Conv_Winograd_v21_1_2_gfx10_f3x2_fp32_stride1_group.inc"
.endif

KERNEL_EPILOG f3x2_fp32_stride1_group
Loading

0 comments on commit 21bcc15

Please sign in to comment.