Skip to content

Commit

Permalink
Merge branch 'develop' into impl_fold_unfold
Browse files Browse the repository at this point in the history
  • Loading branch information
long10024070 authored Nov 18, 2024
2 parents 4868562 + 8e6fff2 commit 1a1a754
Show file tree
Hide file tree
Showing 41 changed files with 1,237 additions and 3,241 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
* @JehandadKhan @junliume
* @BrianHarrisonAMD @junliume @BradPepersAMD
# Documentation files
docs/ @ROCm/rocm-documentation
*.md @ROCm/rocm-documentation
Expand Down
23 changes: 3 additions & 20 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ def cmake_build(Map conf=[:]){
def build_envs = "CTEST_PARALLEL_LEVEL=4 " + conf.get("build_env","")
def prefixpath = conf.get("prefixpath","/opt/rocm")
def build_type_debug = (conf.get("build_type",'release') == 'debug')
def code_conv_enabled = conf.get("codecov", false)

def mlir_args = " -DMIOPEN_USE_MLIR=" + conf.get("mlir_build", "ON")
// WORKAROUND_ISSUE_3192 Disabling MLIR for debug builds since MLIR generates sanitizer errors.
if (build_type_debug || code_conv_enabled)
if (build_type_debug)
{
mlir_args = " -DMIOPEN_USE_MLIR=OFF"
}
Expand Down Expand Up @@ -79,9 +78,7 @@ def cmake_build(Map conf=[:]){
test_flags = " --disable-verification-cache " + test_flags
}

if(code_conv_enabled){ //Need
setup_args = " -DCMAKE_BUILD_TYPE=debug -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags} -fprofile-arcs -ftest-coverage' -DCODECOV_TEST=On " + setup_args
}else if(build_type_debug){
if(build_type_debug){
setup_args = " -DCMAKE_BUILD_TYPE=debug -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'" + setup_args
}else{
setup_args = " -DCMAKE_BUILD_TYPE=release" + setup_args
Expand Down Expand Up @@ -247,7 +244,6 @@ def buildHipClangJob(Map conf=[:]){
show_node_info()
miopenCheckout()
env.HSA_ENABLE_SDMA=0
env.CODECOV_TOKEN="aec031be-7673-43b5-9840-d8fb71a2354e"
env.DOCKER_BUILDKIT=1
def image
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
Expand All @@ -257,7 +253,6 @@ def buildHipClangJob(Map conf=[:]){

def variant = env.STAGE_NAME

def codecov = conf.get("codecov", false)
def needs_gpu = conf.get("needs_gpu", true)
def lfs_pull = conf.get("lfs_pull", false)

Expand Down Expand Up @@ -298,17 +293,6 @@ def buildHipClangJob(Map conf=[:]){
}

cmake_build(conf)

if (codecov) {
sh '''
cd build
lcov --directory . --capture --output-file $(pwd)/coverage.info
lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
lcov --list $(pwd)/coverage.info
curl -s https://codecov.io/bash | bash
echo "Uploaded"
'''
}
}
}
}
Expand Down Expand Up @@ -417,10 +401,9 @@ def CheckPerfDbValid(Map conf=[:]){
/// BuildType := { Release* | Debug | Install } [ BuildTypeModifier ]
/// * BuildTypeModifier := { NOCOMGR | Embedded | Static | Normal-Find | Fast-Find
/// NOCK | NOMLIR | Tensile | Tensile-Latest | Package | ... }
/// TestSet := { All | Smoke* | <Performance Dataset> | Build-only } [ Codecov ]
/// TestSet := { All | Smoke* | <Performance Dataset> | Build-only }
/// * "All" corresponds to "cmake -DMIOPEN_TEST_ALL=On".
/// * "Smoke" (-DMIOPEN_TEST_ALL=Off) is the default and usually not specified.
/// * "Codecov" is optional code coverage analysis.
/// * "Performance Dataset" is a performance test with a specified dataset.
/// Target := { gfx908 | gfx90a | Vega20 | Vega10 | Vega* | gfx1030 } [ Xnack+ ]
/// * "Vega" (gfx906 or gfx900) is the default and usually not specified.
Expand Down
34 changes: 17 additions & 17 deletions driver/bn_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,11 @@ int BatchNormDriver<TInput, Tref, TAcc, TScaleBias, TOut>::GetandSetData()
std::vector<int> in_len = GetInputTensorLengthsFromCmdLine();
SetBNParametersFromCmdLineArgs();

auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign<TInput>(1e-2, 100); };

in.AllocOnHost(tensor<TInput>{bn_layout, in_len});
in.InitHostData(in.GetTensor().desc.GetElementSize(), true, gen_value);
for(size_t i = 0; i < in.GetVector().size(); i++)
{
in.GetVector()[i] = prng::gen_canonical<TInput>();
}

auto derivedBnDesc = miopen::TensorDescriptor{};
miopen::DeriveBNTensorDescriptor(derivedBnDesc, in.GetTensor().desc, bn_mode);
Expand All @@ -208,21 +209,18 @@ int BatchNormDriver<TInput, Tref, TAcc, TScaleBias, TOut>::GetandSetData()
scale.AllocOnHost(tensor<TScaleBias>{bn_layout, derivedBnDesc.GetLengths()});
bias.AllocOnHost(tensor<TScaleBias>{bn_layout, derivedBnDesc.GetLengths()});

auto gen_value_scale_bias = [](auto...) {
return prng::gen_descreet_uniform_sign<TInput>(1e-2, 100);
};

scale.InitHostData(scale.GetTensor().desc.GetElementSize(), true, gen_value_scale_bias);
bias.InitHostData(bias.GetTensor().desc.GetElementSize(), true, gen_value_scale_bias);
for(int i = 0; i < scale.GetVector().size(); i++)
{
scale.GetVector()[i] = prng::gen_canonical<TInput>();
bias.GetVector()[i] = prng::gen_canonical<TInput>();
}
}
if(isFwdInfer)
{
estMean.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});
estVariance.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});

auto gen_value_emean = [](auto...) {
return prng::gen_descreet_uniform_sign<TAcc>(1e-2, 100);
};
auto gen_value_emean = [](auto...) { return prng::gen_descreet_unsigned<TAcc>(1e-2, 100); };
estMean.InitHostData(estMean.GetTensor().desc.GetElementSize(), true, gen_value_emean);
}
else if(isFwdTrain)
Expand All @@ -232,11 +230,11 @@ int BatchNormDriver<TInput, Tref, TAcc, TScaleBias, TOut>::GetandSetData()
runMean.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});
runVariance.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});

auto gen_var = [](auto...) {
return static_cast<TAcc>(1e-2 * (prng::gen_0_to_B(100) + 1));
};
runMean.InitHostData(runMean.GetTensor().desc.GetElementSize(), true, gen_var);
runVariance.InitHostData(runVariance.GetTensor().desc.GetElementSize(), true, gen_var);
for(int i = 0; i < runVariance.GetVector().size(); i++)
{
runMean.GetVector()[i] = prng::gen_canonical<TAcc>();
runVariance.GetVector()[i] = prng::gen_canonical<TAcc>();
}
}
else if(isBwd)
{
Expand All @@ -248,13 +246,15 @@ int BatchNormDriver<TInput, Tref, TAcc, TScaleBias, TOut>::GetandSetData()
auto gen_var_bwd = [](auto...) {
return static_cast<TOut>(1e-2 * (prng::gen_0_to_B(100) + 1));
};

dy.InitHostData(dy.GetTensor().desc.GetElementSize(), true, gen_var_bwd);

dScale.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});
dBias.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});
savedMean.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});
savedInvVar.AllocOnHost(tensor<TAcc>{bn_layout, derivedBnDesc.GetLengths()});

auto gen_value = [](auto...) { return prng::gen_descreet_unsigned<TScaleBias>(1e-2, 100); };
bnScale.InitHostData(bnScale.GetTensor().desc.GetElementSize(), true, gen_value);

auto gen_in_var = [](auto...) {
Expand Down
19 changes: 13 additions & 6 deletions src/batch_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,34 @@ void DeriveBNTensorDescriptor(TensorDescriptor& derivedBnDesc,

TensorDescriptor BuildReshaped4DTensorDescriptor(const miopen::TensorDescriptor& tDesc)
{
std::vector<size_t> dims(tDesc.GetLengths());

auto dataType = tDesc.GetType();
auto layout = tDesc.GetLayout_t();
if(layout == miopenTensorNCDHW)
{
layout = miopenTensorNCHW;

// NxCxDxHxW -> NxCx(D*H)xW
dims[2] *= dims[3];
dims[3] = dims[4];
dims.pop_back();
}
else if(layout == miopenTensorNDHWC)
{
layout = miopenTensorNHWC;

// NxDxHxWxC -> Nx(D*H)xWxC
dims[1] *= dims[2];
dims[2] = dims[3];
dims[3] = dims[4];
dims.pop_back();
}
else
{
std::cout << "Cannot handle layout : " << layout << "\n";
exit(EXIT_FAILURE); // NOLINT (concurrency-mt-unsafe)
}
std::vector<size_t> dims(tDesc.GetLengths());

// NxCxDxHxW -> NxCx(D*H)xW
dims[2] *= dims[3];
dims[3] = dims[4];
dims.pop_back();

return {dataType, layout, dims};
}
Expand Down
148 changes: 75 additions & 73 deletions src/batch_norm_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,31 +248,31 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle,
estMeanDesc,
estimatedVariance,
miopen::debug::BatchNormDirection_t::ForwardInference);

// In case of NxCxDxHxW
int size{0};
miopenGetTensorDescriptorSize(xDesc, &size);
// In case of NxCxDxHxW
auto ReshapeIfNeeded = [size](const auto desc) {
return (size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(desc))
: miopen::deref(desc);
};
return miopen::try_([&] {
miopen::BatchNormForwardInference(
miopen::deref(handle),
bn_mode,
alpha,
beta,
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(xDesc))
: miopen::deref(xDesc),
DataCast(x),
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(yDesc))
: miopen::deref(yDesc),
DataCast(y),
miopen::deref(scaleDesc),
miopen::deref(biasDesc),
miopen::deref(estMeanDesc),
miopen::deref(estVarianceDesc),
DataCast(bnScale),
DataCast(bnBias),
DataCast(estimatedMean),
DataCast(estimatedVariance),
epsilon);
miopen::BatchNormForwardInference(miopen::deref(handle),
bn_mode,
alpha,
beta,
ReshapeIfNeeded(xDesc),
DataCast(x),
ReshapeIfNeeded(yDesc),
DataCast(y),
ReshapeIfNeeded(scaleDesc),
ReshapeIfNeeded(biasDesc),
ReshapeIfNeeded(estMeanDesc),
ReshapeIfNeeded(estVarianceDesc),
DataCast(bnScale),
DataCast(bnBias),
DataCast(estimatedMean),
DataCast(estimatedVariance),
epsilon);
});
}

Expand Down Expand Up @@ -328,33 +328,35 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle,
resultSaveMean,
resultSaveInvVariance,
miopen::debug::BatchNormDirection_t::ForwardTraining);
// In case of NxCxDxHxW

int size{0};
miopenGetTensorDescriptorSize(xDesc, &size);
// In case of NxCxDxHxW
auto ReshapeIfNeeded = [size](const auto desc) {
return (size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(desc))
: miopen::deref(desc);
};
return miopen::try_([&] {
miopen::BatchNormForwardTraining(
miopen::deref(handle),
bn_mode,
alpha,
beta,
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(xDesc))
: miopen::deref(xDesc),
DataCast(x),
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(yDesc))
: miopen::deref(yDesc),
DataCast(y),
miopen::deref(scaleDesc),
miopen::deref(biasDesc),
miopen::deref(savedMeanDesc),
miopen::deref(savedVarianceDesc),
DataCast(bnScale),
DataCast(bnBias),
expAvgFactor,
DataCast(resultRunningMean),
DataCast(resultRunningVariance),
epsilon,
DataCast(resultSaveMean),
DataCast(resultSaveInvVariance));
miopen::BatchNormForwardTraining(miopen::deref(handle),
bn_mode,
alpha,
beta,
ReshapeIfNeeded(xDesc),
DataCast(x),
ReshapeIfNeeded(yDesc),
DataCast(y),
ReshapeIfNeeded(scaleDesc),
ReshapeIfNeeded(biasDesc),
ReshapeIfNeeded(savedMeanDesc),
ReshapeIfNeeded(savedVarianceDesc),
DataCast(bnScale),
DataCast(bnBias),
expAvgFactor,
DataCast(resultRunningMean),
DataCast(resultRunningVariance),
epsilon,
DataCast(resultSaveMean),
DataCast(resultSaveInvVariance));
});
}

Expand Down Expand Up @@ -411,35 +413,35 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle,
savedMean,
savedInvVariance,
miopen::debug::BatchNormDirection_t::Backward);
// In case of NxCxDxHxW
int size{0};
miopenGetTensorDescriptorSize(xDesc, &size);
// In case of NxCxDxHxW
auto ReshapeIfNeeded = [size](const auto desc) {
return (size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(desc))
: miopen::deref(desc);
};
return miopen::try_([&] {
miopen::BatchNormBackward(
miopen::deref(handle),
bn_mode,
alphaDataDiff,
betaDataDiff,
alphaParamDiff,
betaParamDiff,
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(xDesc))
: miopen::deref(xDesc),
DataCast(x),
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(dyDesc))
: miopen::deref(dyDesc),
DataCast(dy),
(size == 5) ? miopen::BuildReshaped4DTensorDescriptor(miopen::deref(dxDesc))
: miopen::deref(dxDesc),
DataCast(dx),
miopen::deref(scaleDesc),
miopen::deref(biasDesc),
miopen::deref(savedMeanDesc),
miopen::deref(savedVarianceDesc),
DataCast(bnScale),
DataCast(resultBnScaleDiff),
DataCast(resultBnBiasDiff),
epsilon,
DataCast(savedMean),
DataCast(savedInvVariance));
miopen::BatchNormBackward(miopen::deref(handle),
bn_mode,
alphaDataDiff,
betaDataDiff,
alphaParamDiff,
betaParamDiff,
ReshapeIfNeeded(xDesc),
DataCast(x),
ReshapeIfNeeded(dyDesc),
DataCast(dy),
ReshapeIfNeeded(dxDesc),
DataCast(dx),
ReshapeIfNeeded(scaleDesc),
ReshapeIfNeeded(biasDesc),
ReshapeIfNeeded(savedMeanDesc),
ReshapeIfNeeded(savedVarianceDesc),
DataCast(bnScale),
DataCast(resultBnScaleDiff),
DataCast(resultBnBiasDiff),
epsilon,
DataCast(savedMean),
DataCast(savedInvVariance));
});
}
Loading

0 comments on commit 1a1a754

Please sign in to comment.