diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a3345a808..aa3d8ce8c6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -115,6 +115,8 @@ set( MIOpen_Source execution_context.cpp reducetensor.cpp reducetensor_api.cpp + activ/problem_description.cpp + solver/activ/fwd_0.cpp include/miopen/buffer_info.hpp include/miopen/temp_file.hpp include/miopen/bfloat16.hpp diff --git a/src/activ/problem_description.cpp b/src/activ/problem_description.cpp new file mode 100644 index 0000000000..4a959032a9 --- /dev/null +++ b/src/activ/problem_description.cpp @@ -0,0 +1,75 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include + +#include + +namespace miopen { + +namespace activ { + +NetworkConfig ProblemDescription::MakeNetworkConfig() const +{ + // short cut for packed tensors and 2D tensors with stride != width + const auto& x_lens = xDesc.GetLengths(); + + const auto x_elem_sz = xDesc.GetElementSize(); + + const auto x_width2D = + ((x_lens.size() == 2) ? x_lens[1] : (x_lens.size() == 3) ? x_lens[2] : (x_lens.size() == 4) + ? x_lens[3] + : x_lens[4]); + + const auto height = + (x_lens.size() == 2) ? x_lens[0] : (x_lens.size() == 3) ? x_lens[1] : (x_lens.size() == 4) + ? x_lens[2] + : x_lens[3]; + + const auto packed = xDesc.IsPacked() && yDesc.IsPacked(); + + const auto read_len = (packed) ? x_elem_sz : x_width2D; + + const auto read_unit = (read_len % 4 == 0) ? 4 : (read_len % 2 == 0) ? 2 : 1; + const auto MAP_RD = read_len / read_unit; + + std::ostringstream ss; + + ss << "activ-"; + ss << ((packed) ? "11" : "10"); // + lite bit + ss << xDesc.GetType(); + ss << activDesc.GetMode(); + ss << read_unit; + ss << MAP_RD; + ss << height; + + return NetworkConfig{ss.str()}; +} + +} // namespace activ + +} // namespace miopen diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index 03abeff9cb..05204bf4c2 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -60,17 +60,15 @@ static boost::filesystem::path ComputeSysCachePath() static boost::filesystem::path ComputeUserCachePath() { #ifdef MIOPEN_CACHE_DIR - std::string cache_dir = MIOPEN_CACHE_DIR; - std::string version; - - version = std::to_string(MIOPEN_VERSION_MAJOR) + "." + std::to_string(MIOPEN_VERSION_MINOR) + - "." + std::to_string(MIOPEN_VERSION_PATCH) + "." + - MIOPEN_STRINGIZE(MIOPEN_VERSION_TWEAK); - auto p = boost::filesystem::path{miopen::ExpandUser(cache_dir)} / version; + const std::string cache_dir = MIOPEN_CACHE_DIR; + const std::string version = + std::to_string(MIOPEN_VERSION_MAJOR) + "." + std::to_string(MIOPEN_VERSION_MINOR) + "." + + std::to_string(MIOPEN_VERSION_PATCH) + "." + MIOPEN_STRINGIZE(MIOPEN_VERSION_TWEAK); const char* const custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{}); - if(custom != nullptr && strlen(custom) > 0) - p = boost::filesystem::path{miopen::ExpandUser(custom)}; + const auto p = (custom != nullptr && strlen(custom) > 0) + ? boost::filesystem::path{miopen::ExpandUser(custom)} + : boost::filesystem::path{miopen::ExpandUser(cache_dir)} / version; if(!boost::filesystem::exists(p) && !MIOPEN_DISABLE_USERDB) boost::filesystem::create_directories(p); diff --git a/src/expanduser.cpp b/src/expanduser.cpp index 96732c3a13..1a0820fabe 100644 --- a/src/expanduser.cpp +++ b/src/expanduser.cpp @@ -7,7 +7,7 @@ MIOPEN_DECLARE_ENV_VAR(HOME) namespace miopen { -std::string ExpandUser(std::string p) +std::string ExpandUser(const std::string& p) { const char* home_dir = GetStringEnv(HOME{}); if(home_dir == nullptr || home_dir == std::string("/") || home_dir == std::string("")) diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index 5c2a05a555..f042336591 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -503,12 +503,11 @@ std::size_t Handle::GetGlobalMemorySize() const std::size_t Handle::GetMaxComputeUnits() const { + const std::size_t num_cu = Value(MIOPEN_DEVICE_CU{}); + if(num_cu > 0) + return num_cu; + int result; - const char* const num_cu = miopen::GetStringEnv(MIOPEN_DEVICE_CU{}); - if(num_cu != nullptr && strlen(num_cu) > 0) - { - return boost::lexical_cast(num_cu); - } auto status = hipDeviceGetAttribute(&result, hipDeviceAttributeMultiprocessorCount, this->impl->device); if(status != hipSuccess) diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 91cf2ae20a..ec8a3f15e8 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -58,8 +58,6 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) -#define MIOPEN_WORKAROUND_SWDEV_225285 1 - #if MIOPEN_USE_COMGR #define MIOPEN_WORKAROUND_ROCM_COMPILER_SUPPORT_ISSUE_27 1 #endif @@ -164,19 +162,12 @@ static hipModulePtr CreateModule(const boost::filesystem::path& hsaco_file) template /// intended for std::string and std::vector hipModulePtr CreateModuleInMem(const T& blob) { -#if !MIOPEN_WORKAROUND_SWDEV_225285 hipModule_t raw_m; auto status = hipModuleLoadData(&raw_m, reinterpret_cast(blob.data())); hipModulePtr m{raw_m}; if(status != hipSuccess) MIOPEN_THROW_HIP_STATUS(status, "Failed loading module"); return m; -#else - TmpDir tmp_dir("miopen"); - auto file_path = tmp_dir.path / boost::filesystem::unique_path("miopen-%%%%-%%%%-%%%%-%%%%"); - WriteFile(blob, file_path); - return CreateModule(file_path); -#endif } HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, @@ -187,16 +178,12 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, } HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, const std::string& blob) - : program(program_name) + : program(program_name) ///, module(CreateModuleInMem(blob)) { - TmpDir tmp_dir("miopen"); - auto file_path = tmp_dir.path / boost::filesystem::unique_path("miopen-%%%%-%%%%-%%%%-%%%%"); - WriteFile(blob, file_path); - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch == nullptr) - { - this->module = CreateModule(file_path); - } + if(nullptr != + miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{})) /// \todo Finish off this spaghetti eventually. + return; + module = CreateModuleInMem(blob); } HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, diff --git a/src/include/miopen/activ/invoke_params.hpp b/src/include/miopen/activ/invoke_params.hpp new file mode 100644 index 0000000000..139bb3a8d2 --- /dev/null +++ b/src/include/miopen/activ/invoke_params.hpp @@ -0,0 +1,52 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { +namespace activ { + +struct InvokeParams : public miopen::InvokeParams +{ + InvokeParams() = default; + + double alpha = 0; + TensorDescriptor x_desc; + ConstData_t x = nullptr; + double beta = 0; + TensorDescriptor y_desc; + Data_t y = nullptr; + double gamma = 0; + size_t x_offset = 0; + size_t y_offset = 0; +}; + +} // namespace activ + +} // namespace miopen diff --git a/src/include/miopen/activ/problem_description.hpp b/src/include/miopen/activ/problem_description.hpp new file mode 100644 index 0000000000..de166ca0aa --- /dev/null +++ b/src/include/miopen/activ/problem_description.hpp @@ -0,0 +1,80 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +#include + +namespace miopen { + +struct NetworkConfig; + +namespace activ { + +enum class Direction +{ + Forward, + Backward, +}; + +struct ProblemDescription +{ + ProblemDescription(Direction direction_, + const ActivationDescriptor& activ, + const TensorDescriptor& xDesc_, + const TensorDescriptor& yDesc_) + : direction(direction_), activDesc(activ), xDesc(xDesc_), yDesc(yDesc_) + { + } + + Direction GetDirection() const { return direction; } + const ActivationDescriptor& GetActivDesc() const { return activDesc; } + const TensorDescriptor& GetXDesc() const { return xDesc; } + const TensorDescriptor& GetYDesc() const { return yDesc; } + + NetworkConfig MakeNetworkConfig() const; + + void Serialize(std::ostream& stream) const; + + friend std::ostream& operator<<(std::ostream& os, const ProblemDescription& obj) + { + obj.Serialize(os); + return os; + } + + private: + Direction direction; + ActivationDescriptor activDesc; + TensorDescriptor xDesc; + TensorDescriptor yDesc; +}; + +} // namespace activ + +} // namespace miopen diff --git a/src/include/miopen/activ/solvers.hpp b/src/include/miopen/activ/solvers.hpp new file mode 100644 index 0000000000..cc7082a631 --- /dev/null +++ b/src/include/miopen/activ/solvers.hpp @@ -0,0 +1,53 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include + +namespace miopen { + +namespace activ { +struct ProblemDescription; +} // namespace activ + +namespace solver { + +namespace activ { + +struct ActivFwdSolver0 : public SolverBase +{ + bool IsApplicable(const ExecutionContext& context, + const miopen::activ::ProblemDescription& problem) const; + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::activ::ProblemDescription& problem) const; +}; + +} // namespace activ + +} // namespace solver + +} // namespace miopen diff --git a/src/include/miopen/execution_context.hpp b/src/include/miopen/execution_context.hpp index 85e610e0f9..59dd4f9fc0 100644 --- a/src/include/miopen/execution_context.hpp +++ b/src/include/miopen/execution_context.hpp @@ -84,6 +84,7 @@ struct ExecutionContext inline void SetStream(Handle* stream_) { stream = stream_; } ExecutionContext() = default; + ExecutionContext(Handle* stream_) : stream(stream_) {} void DetectRocm(); diff --git a/src/include/miopen/expanduser.hpp b/src/include/miopen/expanduser.hpp index f97d7be796..b6e49b3c50 100644 --- a/src/include/miopen/expanduser.hpp +++ b/src/include/miopen/expanduser.hpp @@ -30,7 +30,7 @@ namespace miopen { -std::string ExpandUser(std::string p); +std::string ExpandUser(const std::string& p); } // namespace miopen diff --git a/src/include/miopen/find_solution.hpp b/src/include/miopen/find_solution.hpp index 99534f95e7..c7908ca63f 100644 --- a/src/include/miopen/find_solution.hpp +++ b/src/include/miopen/find_solution.hpp @@ -182,6 +182,55 @@ struct SolverContainer return ss; } + // Search for all applicable solutions among many solvers + template + std::vector + SearchForSolutions(const ExecutionContext& ctx, + const Problem& problem, + std::size_t limit = std::numeric_limits::max()) const + { + std::vector ss; + std::size_t count = 0; + const auto find_only = GetEnvFindOnlySolver(); + miopen::each_args( + [&](auto solver) { + if(count >= limit) + return; + if(find_only.IsValid() && find_only != Id{SolverDbId(solver)}) + { // Do nothing (and keep silence for the sake of Tuna), just skip. + } + // For better performance, check IsDynamic() first, because + // it is much faster than IsApplicable(). + // else if(problem.use_dynamic_solutions_only && !solver.IsDynamic()) + // MIOPEN_LOG_I2(SolverDbId(solver) << ": Skipped (non-dynamic)"); + else if(!solver.IsApplicable(ctx, problem)) + MIOPEN_LOG_I2(SolverDbId(solver) << ": Not applicable"); + else + { + auto s = solver.GetSolution(ctx, problem); + s.solver_id = SolverDbId(solver); + if(s.Succeeded()) + { + ++count; + ss.push_back(s); + MIOPEN_LOG_I2(SolverDbId(solver) << ": Success."); + } + else + { + /// \todo If Solver is applicable it must provide an appropriate Solution. + /// This is not the case for some 20x5 convolutions (and possibly others). + /// Normally we should not get here and message level should be Error. + /// For now, let's use Info (not Warning) level to avoid + /// flooding the console. + MIOPEN_LOG_I(SolverDbId(solver) + << ": [Warning] Applicable Solver not succeeded."); + } + } + }, + Solvers{}...); + return ss; + } + template std::vector> GetWorkspaceSize(const Context& search_params, diff --git a/src/include/miopen/handle.hpp b/src/include/miopen/handle.hpp index 51f947be0b..58bd3eaf6d 100644 --- a/src/include/miopen/handle.hpp +++ b/src/include/miopen/handle.hpp @@ -224,11 +224,11 @@ struct Handle : miopenHandle void RegisterInvoker(const Invoker& invoker, const NetworkConfig& config, - solver::Id solver, + const std::string& solver, const AlgorithmName& algo) { - invokers.Register({config, solver.ToString()}, invoker); - invokers.SetAsFound1_0(config, algo, solver.ToString()); + invokers.Register({config, solver}, invoker); + invokers.SetAsFound1_0(config, algo, solver); } boost::optional diff --git a/src/include/miopen/stringutils.hpp b/src/include/miopen/stringutils.hpp index 3521fff99a..548aeb311f 100644 --- a/src/include/miopen/stringutils.hpp +++ b/src/include/miopen/stringutils.hpp @@ -41,9 +41,10 @@ namespace miopen { inline std::string -ReplaceString(std::string subject, const std::string& search, const std::string& replace) +ReplaceString(const std::string& in, const std::string& search, const std::string& replace) { size_t pos = 0; + std::string subject(in); while((pos = subject.find(search, pos)) != std::string::npos) { subject.replace(pos, search.length(), replace); diff --git a/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp b/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp index 4ed6bb6fd2..8772270693 100644 --- a/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp +++ b/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp @@ -50,10 +50,12 @@ struct binop_with_nan_check(accuVal), currVal, changed); @@ -75,7 +77,7 @@ struct binop_with_nan_check // The method is called when the opReduce is indexable and the user asked for indices __device__ static inline void - calculate(compType& accuVal, compType currVal, int& accuIndex, int currIndex) + calculate(compType& accuVal, compType currVal, VOLATILE_WA_274384 int& accuIndex, int currIndex) { if(isnan(currVal)) { @@ -84,7 +86,7 @@ struct binop_with_nan_check } else { - bool changed = false; + VOLATILE_WA_274384 bool changed = false; opReduce{}(accuVal, currVal, changed); @@ -527,9 +529,9 @@ struct BlockwiseReduction_2d_block_buffer compType& accuData, int& accuIndex) { - const index_t thread_local_id = get_thread_local_1d_id(); - compType lAccuData = opReduce::GetZeroVal(); - int lAccuIndex = 0; + const index_t thread_local_id = get_thread_local_1d_id(); + compType lAccuData = opReduce::GetZeroVal(); + VOLATILE_WA_274384 int lAccuIndex = 0; static_if{}([&](auto) { for(index_t otherDimInd = 0; otherDimInd < toReduceBlocks; otherDimInd++) diff --git a/src/kernels/composable_kernel/include/utility/reduction_common.hpp b/src/kernels/composable_kernel/include/utility/reduction_common.hpp index fc389efcb2..e1c5c2f2df 100644 --- a/src/kernels/composable_kernel/include/utility/reduction_common.hpp +++ b/src/kernels/composable_kernel/include/utility/reduction_common.hpp @@ -28,6 +28,14 @@ #include "float_type.hpp" +#define WORKAROUND_SWDEV_274384 (HIP_PACKAGE_VERSION_FLAT >= 4002021203ULL) + +#if WORKAROUND_SWDEV_274384 +#define VOLATILE_WA_274384 volatile +#else +#define VOLATILE_WA_274384 +#endif + // this enumerate should be synchronized with include/miopen/reduce_common.hpp namespace ck { enum class ReductionMethod_t diff --git a/src/kernels/composable_kernel/include/utility/reduction_operator.hpp b/src/kernels/composable_kernel/include/utility/reduction_operator.hpp index 3019cbd9e4..a9c041895d 100644 --- a/src/kernels/composable_kernel/include/utility/reduction_operator.hpp +++ b/src/kernels/composable_kernel/include/utility/reduction_operator.hpp @@ -93,15 +93,13 @@ struct Max a = b; } - __device__ inline constexpr void operator()(T& a, T b, bool& changed) const + __device__ inline constexpr void operator()(T& a, T b, VOLATILE_WA_274384 bool& changed) const { if(a < b) { a = b; changed = true; } - else - changed = false; } static constexpr bool indexable = true; @@ -120,15 +118,13 @@ struct Min a = b; } - __device__ inline constexpr void operator()(T& a, T b, bool& changed) const + __device__ inline constexpr void operator()(T& a, T b, VOLATILE_WA_274384 bool& changed) const { if(a > b) { a = b; changed = true; } - else - changed = false; } static constexpr bool indexable = true; diff --git a/src/ocl/activ_ocl.cpp b/src/ocl/activ_ocl.cpp index 08f70281e9..0d317a3f63 100644 --- a/src/ocl/activ_ocl.cpp +++ b/src/ocl/activ_ocl.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2017 Advanced Micro Devices, Inc. + * Copyright (c) 2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -28,6 +28,10 @@ #include #include #include +#include +#include +#include +#include namespace miopen { @@ -46,6 +50,49 @@ miopenStatus_t ActivationDescriptor::Forward(Handle& handle, { MIOPEN_THROW("Only alpha=1 and beta=0 is supported"); } + + const auto problem = activ::ProblemDescription{activ::Direction::Forward, *this, xDesc, yDesc}; + + const auto invoke_params = [&]() { + auto tmp = activ::InvokeParams{}; + tmp.type = InvokeType::Run; + tmp.alpha = GetAlpha(); + tmp.beta = GetBeta(); + tmp.gamma = GetGamma(); + tmp.x = x; + tmp.x_desc = xDesc; + tmp.y = y; + tmp.y_desc = yDesc; + tmp.x_offset = xOffset; + tmp.y_offset = yOffset; + return tmp; + }(); + + const auto algo = AlgorithmName{"miopenActivationForward"}; + const auto network_config = problem.MakeNetworkConfig(); + + if(const auto invoker = handle.GetInvoker(network_config, boost::none, algo)) + { + (*invoker)(handle, invoke_params); + return miopenStatusSuccess; + } + + const auto ctx = ExecutionContext{&handle}; + const auto solvers = solver::SolverContainer{}; + const auto slns = solvers.SearchForSolutions(ctx, problem, 1); + + if(!slns.empty()) + { + const auto& sln = slns.front(); + if(!sln.invoker_factory) + MIOPEN_THROW("Invoker missing in solver " + sln.solver_id); + const auto invoker = handle.PrepareInvoker(*sln.invoker_factory, sln.construction_params); + handle.RegisterInvoker(invoker, network_config, sln.solver_id, algo); + invoker(handle, invoke_params); + return miopenStatusSuccess; + } + + // legacy part start miopenStatus_t status = miopenStatusSuccess; mlo_construct_neuron construct_params(conv::Direction::Forward); @@ -53,8 +100,6 @@ miopenStatus_t ActivationDescriptor::Forward(Handle& handle, double activ_beta = GetBeta(); double activ_gamma = GetGamma(); - std::string network_config{}; - // short cut for packed tensors and 2D tensors with stride != width auto x_lens = xDesc.GetLengths(); auto y_lens = yDesc.GetLengths(); @@ -62,325 +107,159 @@ miopenStatus_t ActivationDescriptor::Forward(Handle& handle, auto x_strides = xDesc.GetStrides(); auto y_strides = yDesc.GetStrides(); - auto x_elem_sz = xDesc.GetElementSize(); - auto y_elem_sz = yDesc.GetElementSize(); - - auto x_stride2D = static_cast( - (x_lens.size() == 2) ? x_strides[0] : (x_lens.size() == 3) - ? x_strides[1] - : (x_lens.size() == 4) ? x_strides[2] - : x_strides[3]); - auto y_stride2D = static_cast( - (y_lens.size() == 2) ? y_strides[0] : (y_lens.size() == 3) - ? y_strides[1] - : (y_lens.size() == 4) ? y_strides[2] - : y_strides[3]); - - auto x_width2D = - ((x_lens.size() == 2) ? x_lens[1] : (x_lens.size() == 3) ? x_lens[2] : (x_lens.size() == 4) - ? x_lens[3] - : x_lens[4]); - - auto y_width2D = - ((y_lens.size() == 2) ? y_lens[1] : (y_lens.size() == 3) ? y_lens[2] : (y_lens.size() == 4) - ? y_lens[3] - : y_lens[4]); - - bool t2D = (x_lens.size() == y_lens.size() && - ((x_width2D != x_stride2D) || (y_width2D != y_stride2D)) && - (x_lens.size() == 2 || (x_lens.size() == 3 && x_lens[0] == 1 && y_lens[0] == 1) || - (x_lens.size() == 4 && x_lens[0] == 1 && x_lens[1] == 1 && y_lens[0] == 1 && - y_lens[1] == 1) || - (x_lens.size() == 5 && x_lens[0] == 1 && x_lens[1] == 1 && x_lens[2] == 1 && - y_lens[0] == 1 && y_lens[1] == 1 && y_lens[2] == 1))); - bool packed = xDesc.IsPacked() && yDesc.IsPacked(); - visit_float(xDesc.GetType(), [&](auto as_float) { - - if(x_elem_sz == y_elem_sz && (packed || t2D)) + construct_params.setStream(&handle); + + int nOut = 1; + int cOut = 1; + int hOut = 1; + int wOut = 1; + int nOutStride = 0; + int cOutStride = 0; + int hOutStride = 0; + int wOutStride = 0; + + if(yDesc.GetSize() == 4) { - std::string compiler_options; - auto f_activ_alpha = as_float(activ_alpha); - auto f_activ_beta = as_float(activ_beta); - auto f_activ_gamma = as_float(activ_gamma); - - size_t height = (x_lens.size() == 2) ? x_lens[0] : (x_lens.size() == 3) - ? x_lens[1] - : (x_lens.size() == 4) - ? x_lens[2] - : x_lens[3]; - - size_t read_len = (packed) ? x_elem_sz : x_width2D; - - size_t read_unit = (read_len % 4 == 0) ? 4 : (read_len % 2 == 0) ? 2 : 1; - size_t MAP_RD = read_len / read_unit; - - const std::string READ_TYPE = - (read_unit == 1) ? "_FLOAT" : "_FLOAT" + std::to_string(read_unit); - - network_config = ((packed) ? "11" : "10") // + lite bit - + std::to_string(xDesc.GetType()) + std::to_string(mode) + - std::to_string(read_unit) + std::to_string(MAP_RD) + - std::to_string(height); - - auto&& kernels = handle.GetKernels("miopenActivationForward", network_config); - if(!kernels.empty()) - { - auto kernel = kernels.front(); - if(packed) - { - kernel(x, - y, - f_activ_gamma, - f_activ_beta, - f_activ_alpha, - static_cast(xOffset), - static_cast(yOffset)); - } - else - { - kernel(x, - y, - f_activ_gamma, - f_activ_beta, - f_activ_alpha, - static_cast(xOffset), - static_cast(yOffset), - x_stride2D, - y_stride2D); - } - } - else + std::tie(nOut, cOut, hOut, wOut) = tien<4>(yDesc.GetLengths()); + std::tie(nOutStride, cOutStride, hOutStride, wOutStride) = tien<4>(yDesc.GetStrides()); + } + else if(yDesc.GetSize() < 4 && yDesc.GetSize() > 0) + { + auto tensor_size = yDesc.GetSize(); + switch(tensor_size) { - std::string type_opt; - if(xDesc.GetType() == miopenFloat) - { - type_opt = " -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP32=1"; - } - else if(xDesc.GetType() == miopenHalf) - { - type_opt = " -DMIOPEN_USE_FP16=1 -DMIOPEN_USE_FP32=0"; - } - - compiler_options = " -DLITE -DMIOPEN_READ_UNIT=" + std::to_string(read_unit) + - " -DMIOPEN_READ_TYPE=" + READ_TYPE + " -DMIOPEN_NRN_OP_ID=" + - std::to_string(mode) + type_opt; - - std::vector vld; - std::vector vgd; - - vld.push_back(256); - vld.push_back(1); - vld.push_back(1); - - vgd.push_back(MAP_RD); - - std::string program_name = "MIOpenNeuron.cl"; - std::string kernel_name = - (packed) ? "MIOpenActiveFwdLite" : "MIOpenActiveFwd2DLite"; - if(packed) - { - vgd.push_back(1); - vgd.push_back(1); - - handle.AddKernel("miopenActivationForward", - network_config, - program_name, - kernel_name, - vld, - vgd, - compiler_options)(x, - y, - as_float(f_activ_gamma), - as_float(f_activ_beta), - as_float(f_activ_alpha), - static_cast(xOffset), - static_cast(yOffset)); - } - else - { - - vgd.push_back(height); - vgd.push_back(1); - - handle.AddKernel("miopenActivationForward", - network_config, - program_name, - kernel_name, - vld, - vgd, - compiler_options)(x, - y, - as_float(f_activ_gamma), - as_float(f_activ_beta), - as_float(f_activ_alpha), - static_cast(xOffset), - static_cast(yOffset), - x_stride2D, - y_stride2D); - } + case 1: + std::tie(wOut) = tien<1>(yDesc.GetLengths()); + std::tie(wOutStride) = tien<1>(yDesc.GetStrides()); + nOutStride = wOut * wOutStride; + cOutStride = wOut * wOutStride; + hOutStride = wOut * wOutStride; + break; + case 2: + std::tie(hOut, wOut) = tien<2>(yDesc.GetLengths()); + std::tie(hOutStride, wOutStride) = tien<2>(yDesc.GetStrides()); + nOutStride = hOut * hOutStride; + cOutStride = hOut * hOutStride; + break; + case 3: + std::tie(cOut, hOut, wOut) = tien<3>(yDesc.GetLengths()); + std::tie(cOutStride, hOutStride, wOutStride) = tien<3>(yDesc.GetStrides()); + nOutStride = cOut * cOutStride; + break; + default: assert(false); } } else { - construct_params.setStream(&handle); - - int nOut = 1; - int cOut = 1; - int hOut = 1; - int wOut = 1; - int nOutStride = 0; - int cOutStride = 0; - int hOutStride = 0; - int wOutStride = 0; - - if(yDesc.GetSize() == 4) - { - std::tie(nOut, cOut, hOut, wOut) = tien<4>(yDesc.GetLengths()); - std::tie(nOutStride, cOutStride, hOutStride, wOutStride) = - tien<4>(yDesc.GetStrides()); - } - else if(yDesc.GetSize() < 4 && yDesc.GetSize() > 0) - { - auto tensor_size = yDesc.GetSize(); - switch(tensor_size) - { - case 1: - std::tie(wOut) = tien<1>(yDesc.GetLengths()); - std::tie(wOutStride) = tien<1>(yDesc.GetStrides()); - nOutStride = wOut * wOutStride; - cOutStride = wOut * wOutStride; - hOutStride = wOut * wOutStride; - break; - case 2: - std::tie(hOut, wOut) = tien<2>(yDesc.GetLengths()); - std::tie(hOutStride, wOutStride) = tien<2>(yDesc.GetStrides()); - nOutStride = hOut * hOutStride; - cOutStride = hOut * hOutStride; - break; - case 3: - std::tie(cOut, hOut, wOut) = tien<3>(yDesc.GetLengths()); - std::tie(cOutStride, hOutStride, wOutStride) = tien<3>(yDesc.GetStrides()); - nOutStride = cOut * cOutStride; - break; - default: assert(false); - } - } - else - { - MIOPEN_THROW( - "activation does not support tensor size larger than 4 or smaller than 1"); - } - - construct_params.setTopDescFromMLDesc(yDesc); - int nIn = 1; - int cIn = 1; - int hIn = 1; - int wIn = 1; - int nInStride = 0; - int cInStride = 0; - int hInStride = 0; - int wInStride = 0; + MIOPEN_THROW("activation does not support tensor size larger than 4 or smaller than 1"); + } - if(xDesc.GetSize() == 4) - { - std::tie(nIn, cIn, hIn, wIn) = tien<4>(xDesc.GetLengths()); - std::tie(nInStride, cInStride, hInStride, wInStride) = tien<4>(xDesc.GetStrides()); - } - else if(xDesc.GetSize() < 4 && xDesc.GetSize() > 0) - { - auto tensor_size = xDesc.GetSize(); - switch(tensor_size) - { - case 1: - std::tie(wIn) = tien<1>(xDesc.GetLengths()); - std::tie(wInStride) = tien<1>(xDesc.GetStrides()); - nInStride = wIn * wInStride; - cInStride = wIn * wInStride; - hInStride = wIn * wInStride; - break; - case 2: - std::tie(hIn, wIn) = tien<2>(xDesc.GetLengths()); - std::tie(hInStride, wInStride) = tien<2>(xDesc.GetStrides()); - nInStride = hIn * hInStride; - cInStride = hIn * hInStride; - break; - case 3: - std::tie(cIn, hIn, wIn) = tien<3>(xDesc.GetLengths()); - std::tie(cInStride, hInStride, wInStride) = tien<3>(xDesc.GetStrides()); - nInStride = cIn * cInStride; - break; - default: assert(false); - } - } - else + construct_params.setTopDescFromMLDesc(yDesc); + int nIn = 1; + int cIn = 1; + int hIn = 1; + int wIn = 1; + int nInStride = 0; + int cInStride = 0; + int hInStride = 0; + int wInStride = 0; + + if(xDesc.GetSize() == 4) + { + std::tie(nIn, cIn, hIn, wIn) = tien<4>(xDesc.GetLengths()); + std::tie(nInStride, cInStride, hInStride, wInStride) = tien<4>(xDesc.GetStrides()); + } + else if(xDesc.GetSize() < 4 && xDesc.GetSize() > 0) + { + auto tensor_size = xDesc.GetSize(); + switch(tensor_size) { - MIOPEN_THROW( - "Activation does not support tensor dimension larger than 4 or smaller than 1"); + case 1: + std::tie(wIn) = tien<1>(xDesc.GetLengths()); + std::tie(wInStride) = tien<1>(xDesc.GetStrides()); + nInStride = wIn * wInStride; + cInStride = wIn * wInStride; + hInStride = wIn * wInStride; + break; + case 2: + std::tie(hIn, wIn) = tien<2>(xDesc.GetLengths()); + std::tie(hInStride, wInStride) = tien<2>(xDesc.GetStrides()); + nInStride = hIn * hInStride; + cInStride = hIn * hInStride; + break; + case 3: + std::tie(cIn, hIn, wIn) = tien<3>(xDesc.GetLengths()); + std::tie(cInStride, hInStride, wInStride) = tien<3>(xDesc.GetStrides()); + nInStride = cIn * cInStride; + break; + default: assert(false); } - - construct_params.setBotDescFromMLDesc(xDesc); - - construct_params.setNeuronDescr( - static_cast(mode), activ_gamma, activ_beta, activ_alpha); - - mloConstruct(construct_params); - - std::string program_name = construct_params.getKernelFile(); // CL kernel filename - std::string kernel_name = construct_params.getKernelName(); // kernel name - std::string compiler_options = - construct_params.getCompilerOptions(); // kernel parameters - - const std::vector& vld = construct_params.getLocalWkSize(); - const std::vector& vgd = construct_params.getGlobalWkSize(); - - int imode = mode; - construct_params.getNeuronDescr(imode, activ_gamma, activ_beta, activ_alpha); - - auto f_activ_alpha = as_float(activ_alpha); - auto f_activ_beta = as_float(activ_beta); - auto f_activ_gamma = as_float(activ_gamma); - - compiler_options += - " -DMIOPEN_N_IN=" + std::to_string(nIn) + " -DMIOPEN_C_IN=" + std::to_string(cIn) + - " -DMIOPEN_H_IN=" + std::to_string(hIn) + " -DMIOPEN_W_IN=" + std::to_string(wIn) + - " -DMIOPEN_N_IN_STRIDE=" + std::to_string(nInStride) + " -DMIOPEN_C_IN_STRIDE=" + - std::to_string(cInStride) + " -DMIOPEN_H_IN_STRIDE=" + std::to_string(hInStride) + - " -DMIOPEN_W_IN_STRIDE=" + std::to_string(wInStride) + " -DMIOPEN_N_OUT=" + - std::to_string(nOut) + " -DMIOPEN_C_OUT=" + std::to_string(cOut) + - " -DMIOPEN_H_OUT=" + std::to_string(hOut) + " -DMIOPEN_W_OUT=" + - std::to_string(wOut) + " -DMIOPEN_N_OUT_STRIDE=" + std::to_string(nOutStride) + - " -DMIOPEN_C_OUT_STRIDE=" + std::to_string(cOutStride) + " -DMIOPEN_H_OUT_STRIDE=" + - std::to_string(hOutStride) + " -DMIOPEN_W_OUT_STRIDE=" + - std::to_string(wOutStride) + " -DMIOPEN_N_DIN=" + std::to_string(1) + - " -DMIOPEN_C_DIN=" + std::to_string(1) + " -DMIOPEN_H_DIN=" + std::to_string(1) + - " -DMIOPEN_W_DIN=" + std::to_string(1) + " -DMIOPEN_N_DIN_STRIDE=" + - std::to_string(1) + " -DMIOPEN_C_DIN_STRIDE=" + std::to_string(1) + - " -DMIOPEN_H_DIN_STRIDE=" + std::to_string(1) + " -DMIOPEN_W_DIN_STRIDE=" + - std::to_string(1) + " -DMIOPEN_N_DOUT=" + std::to_string(1) + " -DMIOPEN_C_DOUT=" + - std::to_string(1) + " -DMIOPEN_H_DOUT=" + std::to_string(1) + " -DMIOPEN_W_DOUT=" + - std::to_string(1) + " -DMIOPEN_N_DOUT_STRIDE=" + std::to_string(1) + - " -DMIOPEN_C_DOUT_STRIDE=" + std::to_string(1) + " -DMIOPEN_H_DOUT_STRIDE=" + - std::to_string(1) + " -DMIOPEN_W_DOUT_STRIDE=" + std::to_string(1) + - " -DMIOPEN_IN_BLOCK_SZ=" + std::to_string(cIn * hIn * wIn) + - " -DMIOPEN_OUT_BLOCK_SZ=" + std::to_string(cOut * hOut * wOut) + - " -DMIOPEN_DIN_BLOCK_SZ=" + std::to_string(1) + " -DMIOPEN_DOUT_BLOCK_SZ=" + - std::to_string(1); - - handle.AddKernel("miopenActivationForward", - network_config, - program_name, - kernel_name, - vld, - vgd, - compiler_options)(x, - y, - as_float(f_activ_gamma), - as_float(f_activ_beta), - as_float(f_activ_alpha), - static_cast(xOffset), - static_cast(yOffset)); } + else + { + MIOPEN_THROW( + "Activation does not support tensor dimension larger than 4 or smaller than 1"); + } + + construct_params.setBotDescFromMLDesc(xDesc); + + construct_params.setNeuronDescr( + static_cast(mode), activ_gamma, activ_beta, activ_alpha); + + mloConstruct(construct_params); + + std::string program_name = construct_params.getKernelFile(); // CL kernel filename + std::string kernel_name = construct_params.getKernelName(); // kernel name + std::string compiler_options = construct_params.getCompilerOptions(); // kernel parameters + + const std::vector& vld = construct_params.getLocalWkSize(); + const std::vector& vgd = construct_params.getGlobalWkSize(); + + int imode = mode; + construct_params.getNeuronDescr(imode, activ_gamma, activ_beta, activ_alpha); + + auto f_activ_alpha = as_float(activ_alpha); + auto f_activ_beta = as_float(activ_beta); + auto f_activ_gamma = as_float(activ_gamma); + + compiler_options += + " -DMIOPEN_N_IN=" + std::to_string(nIn) + " -DMIOPEN_C_IN=" + std::to_string(cIn) + + " -DMIOPEN_H_IN=" + std::to_string(hIn) + " -DMIOPEN_W_IN=" + std::to_string(wIn) + + " -DMIOPEN_N_IN_STRIDE=" + std::to_string(nInStride) + " -DMIOPEN_C_IN_STRIDE=" + + std::to_string(cInStride) + " -DMIOPEN_H_IN_STRIDE=" + std::to_string(hInStride) + + " -DMIOPEN_W_IN_STRIDE=" + std::to_string(wInStride) + " -DMIOPEN_N_OUT=" + + std::to_string(nOut) + " -DMIOPEN_C_OUT=" + std::to_string(cOut) + " -DMIOPEN_H_OUT=" + + std::to_string(hOut) + " -DMIOPEN_W_OUT=" + std::to_string(wOut) + + " -DMIOPEN_N_OUT_STRIDE=" + std::to_string(nOutStride) + " -DMIOPEN_C_OUT_STRIDE=" + + std::to_string(cOutStride) + " -DMIOPEN_H_OUT_STRIDE=" + std::to_string(hOutStride) + + " -DMIOPEN_W_OUT_STRIDE=" + std::to_string(wOutStride) + " -DMIOPEN_N_DIN=" + + std::to_string(1) + " -DMIOPEN_C_DIN=" + std::to_string(1) + " -DMIOPEN_H_DIN=" + + std::to_string(1) + " -DMIOPEN_W_DIN=" + std::to_string(1) + " -DMIOPEN_N_DIN_STRIDE=" + + std::to_string(1) + " -DMIOPEN_C_DIN_STRIDE=" + std::to_string(1) + + " -DMIOPEN_H_DIN_STRIDE=" + std::to_string(1) + " -DMIOPEN_W_DIN_STRIDE=" + + std::to_string(1) + " -DMIOPEN_N_DOUT=" + std::to_string(1) + " -DMIOPEN_C_DOUT=" + + std::to_string(1) + " -DMIOPEN_H_DOUT=" + std::to_string(1) + " -DMIOPEN_W_DOUT=" + + std::to_string(1) + " -DMIOPEN_N_DOUT_STRIDE=" + std::to_string(1) + + " -DMIOPEN_C_DOUT_STRIDE=" + std::to_string(1) + " -DMIOPEN_H_DOUT_STRIDE=" + + std::to_string(1) + " -DMIOPEN_W_DOUT_STRIDE=" + std::to_string(1) + + " -DMIOPEN_IN_BLOCK_SZ=" + std::to_string(cIn * hIn * wIn) + " -DMIOPEN_OUT_BLOCK_SZ=" + + std::to_string(cOut * hOut * wOut) + " -DMIOPEN_DIN_BLOCK_SZ=" + std::to_string(1) + + " -DMIOPEN_DOUT_BLOCK_SZ=" + std::to_string(1); + + handle.AddKernel("miopenActivationForward", + network_config, + program_name, + kernel_name, + vld, + vgd, + compiler_options)(x, + y, + as_float(f_activ_gamma), + as_float(f_activ_beta), + as_float(f_activ_alpha), + static_cast(xOffset), + static_cast(yOffset)); }); return (status); } diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index e157a5c1cd..5d339f981e 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -958,7 +958,8 @@ static Invoker PrepareInvoker(Handle& handle, const auto invoker = handle.PrepareInvoker(*solution.invoker_factory, solution.construction_params); - handle.RegisterInvoker(invoker, config, solver_id, AlgorithmName(solver_id.GetAlgo(dir))); + handle.RegisterInvoker( + invoker, config, solver_id.ToString(), AlgorithmName(solver_id.GetAlgo(dir))); return invoker; // NOLINT (performance-no-automatic-move) } diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index b3aba09049..6254d1d058 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -593,9 +593,9 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, std::to_string(compType) + "IN"; for(auto dimLen : inDescLengths) network_config += std::to_string(dimLen) + "_"; - network_config += "OUT"; - for(auto dimLen : outDescLengths) - network_config += std::to_string(dimLen) + "_"; + network_config += "RED"; + for(auto dim : toReduceDims) + network_config += std::to_string(dim) + "_"; network_config += "BSIZE_" + std::to_string(blockSize); // kernel for the first call diff --git a/src/solver.cpp b/src/solver.cpp index f93e225589..d1e038ba05 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2017 Advanced Micro Devices, Inc. + * Copyright (c) 2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,8 +25,9 @@ *******************************************************************************/ #include -#include +#include +#include #include #include #include @@ -172,10 +173,7 @@ miopenConvAlgorithm_t Id::GetAlgo() const return it->second; } -inline bool Register(IdRegistryData& registry, - uint64_t value, - const std::string& str, - miopenConvAlgorithm_t algo) +inline bool Register(IdRegistryData& registry, uint64_t value, const std::string& str) { if(value == Id::invalid_value) { @@ -204,6 +202,16 @@ inline bool Register(IdRegistryData& registry, registry.value_to_str.emplace(value, str); registry.str_to_value.emplace(str, value); + return true; +} + +inline bool Register(IdRegistryData& registry, + uint64_t value, + const std::string& str, + miopenConvAlgorithm_t algo) +{ + if(!Register(registry, value, str)) + return false; registry.value_to_algo.emplace(value, algo); return true; } @@ -418,6 +426,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) RegisterWithSolver(registry, ++id, ConvMlirIgemmBwdXdlops{}, miopenConvolutionAlgoImplicitGEMM); RegisterWithSolver(registry, ++id, ConvMlirIgemmWrWXdlops{}, miopenConvolutionAlgoImplicitGEMM); + Register(registry, ++id, SolverDbId(activ::ActivFwdSolver0{})); + RegisterWithSolver(registry, ++id, ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC{}, diff --git a/src/solver/activ/fwd_0.cpp b/src/solver/activ/fwd_0.cpp new file mode 100644 index 0000000000..7b149d3e95 --- /dev/null +++ b/src/solver/activ/fwd_0.cpp @@ -0,0 +1,217 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include + +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace activ { + +bool ActivFwdSolver0::IsApplicable(const ExecutionContext&, + const miopen::activ::ProblemDescription& problem) const +{ + if(problem.GetDirection() != miopen::activ::Direction::Forward) + return false; + + // short cut for packed tensors and 2D tensors with stride != width + const auto& x_lens = problem.GetXDesc().GetLengths(); + const auto& y_lens = problem.GetYDesc().GetLengths(); + + const auto& x_strides = problem.GetXDesc().GetStrides(); + const auto& y_strides = problem.GetYDesc().GetStrides(); + + const auto x_elem_sz = problem.GetXDesc().GetElementSize(); + const auto y_elem_sz = problem.GetYDesc().GetElementSize(); + + const auto x_stride2D = static_cast( + (x_lens.size() == 2) ? x_strides[0] : (x_lens.size() == 3) + ? x_strides[1] + : (x_lens.size() == 4) ? x_strides[2] + : x_strides[3]); + const auto y_stride2D = static_cast( + (y_lens.size() == 2) ? y_strides[0] : (y_lens.size() == 3) + ? y_strides[1] + : (y_lens.size() == 4) ? y_strides[2] + : y_strides[3]); + + const auto x_width2D = + ((x_lens.size() == 2) ? x_lens[1] : (x_lens.size() == 3) ? x_lens[2] : (x_lens.size() == 4) + ? x_lens[3] + : x_lens[4]); + + const auto y_width2D = + ((y_lens.size() == 2) ? y_lens[1] : (y_lens.size() == 3) ? y_lens[2] : (y_lens.size() == 4) + ? y_lens[3] + : y_lens[4]); + + const auto t2D = + (x_lens.size() == y_lens.size() && + ((x_width2D != x_stride2D) || (y_width2D != y_stride2D)) && + (x_lens.size() == 2 || (x_lens.size() == 3 && x_lens[0] == 1 && y_lens[0] == 1) || + (x_lens.size() == 4 && x_lens[0] == 1 && x_lens[1] == 1 && y_lens[0] == 1 && + y_lens[1] == 1) || + (x_lens.size() == 5 && x_lens[0] == 1 && x_lens[1] == 1 && x_lens[2] == 1 && + y_lens[0] == 1 && y_lens[1] == 1 && y_lens[2] == 1))); + const auto packed = problem.GetXDesc().IsPacked() && problem.GetYDesc().IsPacked(); + + return x_elem_sz == y_elem_sz && (packed || t2D); +} + +ConvSolution ActivFwdSolver0::GetSolution(const ExecutionContext&, + const miopen::activ::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + // short cut for packed tensors and 2D tensors with stride != width + const auto x_lens = problem.GetXDesc().GetLengths(); + const auto y_lens = problem.GetYDesc().GetLengths(); + + const auto x_elem_sz = problem.GetXDesc().GetElementSize(); + + const auto x_width2D = + ((x_lens.size() == 2) ? x_lens[1] : (x_lens.size() == 3) ? x_lens[2] : (x_lens.size() == 4) + ? x_lens[3] + : x_lens[4]); + + const auto packed = problem.GetXDesc().IsPacked() && problem.GetYDesc().IsPacked(); + const auto read_len = (packed) ? x_elem_sz : x_width2D; + const auto read_unit = (read_len % 4 == 0) ? 4 : (read_len % 2 == 0) ? 2 : 1; + + const auto READ_TYPE = (read_unit == 1) ? "_FLOAT" : "_FLOAT" + std::to_string(read_unit); + + const auto height = + (x_lens.size() == 2) ? x_lens[0] : (x_lens.size() == 3) ? x_lens[1] : (x_lens.size() == 4) + ? x_lens[2] + : x_lens[3]; + + auto build_params = KernelBuildParameters{ + {"LITE"}, + {"MIOPEN_READ_UNIT", read_unit}, + {"MIOPEN_READ_TYPE", READ_TYPE}, + {"MIOPEN_NRN_OP_ID", problem.GetActivDesc().GetMode()}, + }; + + if(problem.GetXDesc().GetType() == miopenFloat) + { + build_params.Define("MIOPEN_USE_FP16", 0); + build_params.Define("MIOPEN_USE_FP32", 1); + } + else if(problem.GetXDesc().GetType() == miopenHalf) + { + build_params.Define("MIOPEN_USE_FP16", 1); + build_params.Define("MIOPEN_USE_FP32", 0); + } + + { + auto kernel_info = KernelInfo{}; + kernel_info.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + + kernel_info.l_wk.push_back(256); + kernel_info.l_wk.push_back(1); + kernel_info.l_wk.push_back(1); + + const auto MAP_RD = read_len / read_unit; + + kernel_info.g_wk.push_back(MAP_RD); + kernel_info.g_wk.push_back(packed ? 1 : height); + kernel_info.g_wk.push_back(1); + + kernel_info.kernel_file = "MIOpenNeuron.cl"; + kernel_info.kernel_name = (packed) ? "MIOpenActiveFwdLite" : "MIOpenActiveFwd2DLite"; + + result.construction_params.push_back(kernel_info); + } + + result.invoker_factory = [=](const std::vector& kernels) { + return [=](const Handle& handle, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(params.x_desc.GetType(), [&](auto as_float) { + const auto alpha = as_float(params.alpha); + const auto beta = as_float(params.beta); + const auto gamma = as_float(params.gamma); + + if(packed) + { + kernel(params.x, + params.y, + gamma, + beta, + alpha, + static_cast(params.x_offset), + static_cast(params.y_offset)); + } + else + { + const auto x_lens_ = params.x_desc.GetLengths(); + const auto y_lens_ = params.y_desc.GetLengths(); + + const auto x_strides = params.x_desc.GetStrides(); + const auto y_strides = params.y_desc.GetStrides(); + + const auto x_stride2D = static_cast( + (x_lens_.size() == 2) ? x_strides[0] : (x_lens_.size() == 3) + ? x_strides[1] + : (x_lens_.size() == 4) + ? x_strides[2] + : x_strides[3]); + const auto y_stride2D = static_cast( + (y_lens_.size() == 2) ? y_strides[0] : (y_lens_.size() == 3) + ? y_strides[1] + : (y_lens_.size() == 4) + ? y_strides[2] + : y_strides[3]); + + kernel(params.x, + params.y, + gamma, + beta, + alpha, + static_cast(params.x_offset), + static_cast(params.y_offset), + x_stride2D, + y_stride2D); + } + }); + }; + }; + + return result; +} + +} // namespace activ + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index 0354ecc9d2..9d4475d3fe 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -208,7 +208,7 @@ static inline bool FindImplicitGemmWrwV4R1DynamicKernel(const ConvolutionContext grid_size = (GemmM / GemmMPerBlock) * (GemmN / (GemmNRepeat * GemmNPerThreadSubC * GemmN0YXPerBlock)) * GemmKGroups; - if((ho * wo) % 4 == 0) + if((ho * wo) % 16 == 0) kernel_name = "igemm_v4r1_dynamic_wrw_128x128x16_8x8_4x4x4x4x4x4_16x1x16x1_4x64"; else kernel_name = "igemm_v4r1_dynamic_wrw_128x128x16_8x8_4x4x4x4x4x4_16x1x16x1_16x16"; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bedc8f50df..64d09d40ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -934,6 +934,8 @@ COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose -- COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose --input 32 128 34 34 --weights 64 128 3 3 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose --input 128 128 35 35 --weights 128 128 3 3 --pads_strides_dilations 1 1 1 1 1 1 --disable-forward --disable-backward-data COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose --input 128 256 56 56 --weights 64 256 1 1 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data +COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose --input 64 512 28 28 --weights 256 512 1 1 --pads_strides_dilations 0 0 2 2 1 1 --disable-forward --disable-backward-data +COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS} $ --verbose --input 64 512 14 14 --weights 256 512 1 1 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data COMMAND ${DYNAMIC_IMPLICITGEMM_BWD_ENVS} $ --verbose --input 64 64 56 56 --weights 256 64 1 1 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-weights COMMAND ${DYNAMIC_IMPLICITGEMM_BWD_ENVS} $ --verbose --input 32 128 34 34 --weights 64 128 3 3 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-weights COMMAND ${DYNAMIC_IMPLICITGEMM_BWD_ENVS} $ --verbose --input 128 128 35 35 --weights 128 128 3 3 --pads_strides_dilations 1 1 1 1 1 1 --disable-forward --disable-backward-weights @@ -987,6 +989,9 @@ COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIO COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 3 256 28 28 --weights 80 256 1 1 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 2 256 12 18 --weights 256 256 3 3 --pads_strides_dilations 1 1 1 1 1 1 --disable-forward --disable-backward-data COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 4 512 128 128 --weights 12 512 1 1 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data +#regression test for issue 540 +COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 4 32 79 141 --weights 64 32 5 10 --pads_strides_dilations 0 0 2 2 1 1 --disable-forward --disable-backward-data + # WORKAROUND_ISSUE_996 # COMMAND ${DYNAMIC_IMPLICITGEMM_WRW_ENVS_XDLOPS} $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 400 256 7 7 --weights 1024 256 7 7 --pads_strides_dilations 0 0 1 1 1 1 --disable-forward --disable-backward-data # WORKAROUND_ISSUE_996 @@ -1121,8 +1126,7 @@ if(MIOPEN_TEST_CONV) endif() if(MIOPEN_TEST_FLOAT) -# WORKAROUND_SWDEV_291479 -# add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED COMMAND $ --double --all --verbose) + add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED COMMAND $ --double --all --verbose) endif() # Add here regression tests that should be run only on Vega10/20 and only with FP16. diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 324e7933fd..9c3f678eda 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -820,11 +820,21 @@ struct reduce_driver : test_driver max_value = miopen_type{} == miopenHalf ? 13 : miopen_type{} == miopenInt8 ? 127 : 17; + // default data gneration (used by MIN/MAX) auto gen_value = [&](auto... is) { return (tensor_elem_gen_integer{max_value}(is...) * tensor_elem_gen_checkboard_sign{}(is...)); }; + // data generation used by ADD/AVG, data is distributed around 1.0 rather than 0.0, very low + // probability to get a reduced result of zero-value + auto gen_value_1 = [&](auto... is) { + auto rand_value = tensor_elem_gen_integer{max_value}(is...); + auto sign_value = tensor_elem_gen_checkboard_sign{}(is...); + + return (sign_value * rand_value + 1.0); + }; + // Special data generation for MUL, to avoid all-zero and large accumulative error in the // reduced result auto gen_value_2 = [&](auto... is) { @@ -835,12 +845,7 @@ struct reduce_driver : test_driver : (rand_value + max_value + 1) / (rand_value + max_value); }; - bool need_indices = - ((reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX || - reduceOp == MIOPEN_REDUCE_TENSOR_AMAX) && - indicesOpt != MIOPEN_REDUCE_TENSOR_NO_INDICES); - - // Special data generation for MIN/MAX/AMAX using a space of limitless number of values. + // Special data generation for NORM1 and NORM2 using a space of limitless number of values. // This method is slower due to the use of rand(), it is usually used for manual testing auto gen_value_3 = [&](auto... is) { auto rand_upper = tensor_elem_gen_integer{max_value}(is...); @@ -850,6 +855,14 @@ struct reduce_driver : test_driver return rand_upper * sign_value * rand_ratio; }; + // Special data generation for AMAX, no zero value used + auto gen_value_4 = [&](auto... is) { + auto rand_value = tensor_elem_gen_integer{max_value}(is...); + auto sign_value = tensor_elem_gen_checkboard_sign{}(is...); + + return sign_value > 0.0 ? (rand_value + 0.5) : (-1.0 * rand_value - 0.5); + }; + // default tolerance (refer to driver.hpp) this->tolerance = 80; @@ -866,12 +879,27 @@ struct reduce_driver : test_driver if(std::is_same::value) this->tolerance *= this->tolerance * 10.0; - auto inputTensor = (reduceOp == MIOPEN_REDUCE_TENSOR_MUL) - ? tensor{this->inLengths}.generate(gen_value_2) - : (need_indices || reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || - reduceOp == MIOPEN_REDUCE_TENSOR_NORM2 - ? tensor{this->inLengths}.generate(gen_value_3) - : tensor{this->inLengths}.generate(gen_value)); + tensor inputTensor; + + switch(reduceOp) + { + case MIOPEN_REDUCE_TENSOR_ADD: + case MIOPEN_REDUCE_TENSOR_AVG: + inputTensor = tensor{this->inLengths}.generate(gen_value_1); + break; + case MIOPEN_REDUCE_TENSOR_MUL: + inputTensor = tensor{this->inLengths}.generate(gen_value_2); + break; + case MIOPEN_REDUCE_TENSOR_NORM1: + case MIOPEN_REDUCE_TENSOR_NORM2: + inputTensor = tensor{this->inLengths}.generate(gen_value_3); + break; + case MIOPEN_REDUCE_TENSOR_AMAX: + inputTensor = tensor{this->inLengths}.generate(gen_value_4); + break; + default: inputTensor = tensor{this->inLengths}.generate(gen_value); + }; + auto outputTensor = tensor{outLengths}; std::fill(outputTensor.begin(), outputTensor.end(), convert_type(0.0f));