Skip to content

Commit

Permalink
[Tuning] Limit Tuning by Time (#1997)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Evgenii Averin <[email protected]>
  • Loading branch information
JehandadKhan and averinevg authored Mar 14, 2023
1 parent f1196f8 commit b4e0a67
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 33 deletions.
24 changes: 21 additions & 3 deletions src/generic_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,39 @@
*
*******************************************************************************/

#include <miopen/env.hpp>
#include <miopen/generic_search.hpp>
#include <miopen/generic_search_controls.hpp>

#include <cstddef>
#include <limits>
#include <chrono>

namespace miopen {
namespace solver {

MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX)

std::size_t GetTuningIterationsMax()
{
return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}, std::numeric_limits<std::size_t>::max());
}

std::chrono::milliseconds GetTuningTimeMax()
{
static const auto fallback =
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::hours{2});
static const auto res =
std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{}, fallback.count())};
return res;
}

std::size_t GetTuningThreadsMax()
{
#if MIOPEN_USE_COMGR
const auto def_max = 1; // COMGR is not parallelizable
#else
const int def_max = std::thread::hardware_concurrency() / 2;
#endif
return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}, def_max);
}

} // namespace solver
} // namespace miopen
119 changes: 92 additions & 27 deletions src/include/miopen/generic_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <miopen/logger.hpp>
#include <miopen/timer.hpp>
#include <miopen/type_traits.hpp>
#include <miopen/mt_queue.hpp>
#include <miopen/generic_search_controls.hpp>

#include <algorithm>
#include <vector>
Expand All @@ -45,12 +47,11 @@
#include <iterator>
#include <chrono>
#include <cassert>
#include <random>

namespace miopen {
namespace solver {

MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY)

/// This STL-like container together with corresponding iterator provide access
/// to a set of all available performance configs for the given problem config.
///
Expand Down Expand Up @@ -294,6 +295,50 @@ GetAllSolutions(const Solver s, const Context& context_, const Problem& problem)
}

std::size_t GetTuningIterationsMax();
std::chrono::milliseconds GetTuningTimeMax(); // returns the max allowed time in milliseconds
std::size_t GetTuningThreadsMax();

template <typename PerformanceConfig, typename Solver, typename Context, typename Problem>
void CompileAgent(size_t thread_index,
size_t total_threads,
const Solver& s,
const Context& context,
const Problem& problem,
std::vector<PerformanceConfig>& data,
ThreadSafeQueue<std::tuple<PerformanceConfig, ConvSolution, bool>>& comp_queue)
{
const auto start_time =
std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now());
const auto data_size = data.size();
const auto time_budget = GetTuningTimeMax();
const auto& profile_h = context.GetStream();
// start the counter
for(auto idx = thread_index; idx < data_size; idx += total_threads)
{
// Check if we are out of time
const auto current_time = std::chrono::time_point_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now());
if(current_time - start_time > time_budget)
{
MIOPEN_LOG_I2("Thread: " << thread_index << " Done, exhausted time budget");
auto tmp = std::make_tuple<PerformanceConfig, ConvSolution, bool>({}, {}, true);
comp_queue.push(std::move(tmp));
break;
}
auto& current_config = data.at(idx);
ConvSolution current_solution = s.GetSolution(context, problem, current_config);
for(const auto& kernel : current_solution.construction_params)
{
if(profile_h.HasProgram(kernel.kernel_file, kernel.comp_options))
continue;
std::ignore = profile_h.LoadProgram(kernel.kernel_file, kernel.comp_options, false, "");
}
auto tup = std::make_tuple<PerformanceConfig, ConvSolution, bool>(
std::move(current_config), std::move(current_solution), false);
comp_queue.push(std::move(tup));
}
MIOPEN_LOG_I2("Thread: " << thread_index << " Done, completed tuning");
}

template <class Solver, class Context, class Problem>
auto GenericSearch(const Solver s,
Expand Down Expand Up @@ -323,10 +368,16 @@ auto GenericSearch(const Solver s,
auto& profile_h = context.GetStream();
AutoEnableProfiling enableProfiling{profile_h};

auto all_configs = GetAllConfigs(s, context, problem);
const std::size_t n_runs_total =
std::min(static_cast<std::size_t>(std::distance(all_configs.begin(), all_configs.end())),
GetTuningIterationsMax());
auto tmp_all_configs = GetAllConfigs(s, context, problem);
// For random access
std::vector<PerformanceConfig> all_configs;
std::copy(tmp_all_configs.begin(), tmp_all_configs.end(), std::back_inserter(all_configs));
// shuffle the configs
std::random_device rd{};
auto rng = std::default_random_engine{rd()};
std::shuffle(all_configs.begin(), all_configs.end(), rng);
const std::size_t n_runs_total = std::min(all_configs.size(), GetTuningIterationsMax());
all_configs.resize(n_runs_total);

bool is_passed = false; // left false only if all iterations failed.
float best_time = std::numeric_limits<float>::max();
Expand All @@ -335,45 +386,56 @@ auto GenericSearch(const Solver s,
HeartBeat<PerformanceConfig> heartbeat;
heartbeat.Start();

if(!miopen::IsCacheDisabled()) // Otherwise precompilation is useless.
const auto total_threads = GetTuningThreadsMax();

ThreadSafeQueue<std::tuple<PerformanceConfig, ConvSolution, bool>> solution_queue;
std::vector<std::thread> compile_agents;
compile_agents.reserve(total_threads);
for(auto idx = 0; idx < total_threads; ++idx)
{
std::vector<KernelInfo> kernels;
size_t n_current = 0;
for(const auto& current_config : all_configs)
{
if(n_current >= n_runs_total)
break;
ConvSolution current_solution = s.GetSolution(context, problem, current_config);
for(auto&& kernel : current_solution.construction_params)
{
if(profile_h.HasProgram(kernel.kernel_file, kernel.comp_options))
continue;
kernels.push_back(kernel);
}
++n_current;
}
std::ignore = PrecompileKernels(profile_h, kernels);
compile_agents.emplace_back(CompileAgent<PerformanceConfig, Solver, Context, Problem>,
idx,
total_threads,
std::cref(s),
std::cref(context),
std::cref(problem),
std::ref(all_configs),
std::ref(solution_queue));
}

if(!IsEnabled(MIOPEN_DEBUG_COMPILE_ONLY{}))
{
size_t n_current = 0;
for(const auto& current_config : all_configs)
size_t n_current = 0;
auto threads_remaining = total_threads;
while(true)
{
if(n_current >= n_runs_total)
break;
MIOPEN_LOG_I2("Waiting for item in queue");
const auto kinder = solution_queue.pop();
auto current_config = std::get<0>(kinder);
auto current_solution = std::get<1>(kinder);

if(std::get<2>(kinder))
{
threads_remaining--;
if(threads_remaining == 0)
break;
else
{
continue;
}
}

float elapsed_time = 0.0f;
int ret = 0;
MIOPEN_LOG_I2('#' << n_current << '/' << n_failed << '/' << n_runs_total << ' '
<< current_config);

ConvSolution current_solution;
Invoker invoker;

try
{
current_solution = s.GetSolution(context, problem, current_config);
if(default_solution.workspace_sz != current_solution.workspace_sz)
{
ret = -2;
Expand Down Expand Up @@ -472,6 +534,9 @@ auto GenericSearch(const Solver s,
"Running kernels on GPU is disabled. Search skipped");
}

for(auto& agent : compile_agents)
agent.join();

MIOPEN_LOG_W("Done: " << n_runs_total << '/' << n_failed << '/' << n_runs_total << ", best #"
<< n_best << ' ' << best_time << ' ' << best_config);

Expand Down
39 changes: 39 additions & 0 deletions src/include/miopen/generic_search_controls.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once
#include <miopen/env.hpp>

namespace miopen {
namespace solver {

MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX)
MIOPEN_DECLARE_ENV_VAR(MIOPEN_TUNING_TIME_MS_MAX)
MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL)
MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY)

} // namespace solver
} // namespace miopen
59 changes: 59 additions & 0 deletions src/include/miopen/mt_queue.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

#include <queue>
#include <condition_variable>
#include <mutex>

template <typename T>
class ThreadSafeQueue
{
std::mutex mutex;
std::condition_variable cond_var;
std::queue<T> queue;

public:
void push(T&& item)
{

{
std::lock_guard<std::mutex> lock(mutex);
queue.push(item);
}

cond_var.notify_one();
}
T pop()
{
std::unique_lock<std::mutex> lock(mutex);
cond_var.wait(lock, [&] { return !queue.empty(); });
T ret = queue.front();
queue.pop();
return ret;
}
};
25 changes: 25 additions & 0 deletions src/include/miopen/sqlite_db.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class path;

namespace miopen {
MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_SQL_WAL)
MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_PERFDB_OVERRIDE)

constexpr bool InMemDb = MIOPEN_EMBED_DB;
#if MIOPEN_ENABLE_SQLITE_BACKOFF
Expand Down Expand Up @@ -438,6 +439,30 @@ class SQLitePerfDb : public SQLiteBase<SQLitePerfDb>
{
if(dbInvalid)
return boost::none;

const auto pdb_ovr = miopen::GetStringEnv(MIOPEN_DEBUG_PERFDB_OVERRIDE{});
if(pdb_ovr != nullptr)
{
MIOPEN_LOG_I2("overriding tuning params with: " << pdb_ovr);
DbRecord ovr_rec;
const auto solv_vals = SplitDelim(pdb_ovr, ':');
bool success = true;
for(const auto& solv_val : solv_vals)
{
const auto vals = SplitDelim(solv_val, ';');
if(vals.size() != 2)
{
MIOPEN_LOG_W("Invalid value for MIOPEN_DEBUG_PERFDB_OVERRIDE. Format: "
"<solver1_name>;<params>:<solver2_name>;params");
success = false;
break;
}
MIOPEN_LOG_I2("Inserting Overriding PDB entry: " << vals[0] << ";" << vals[1]);
ovr_rec.SetValues(vals.at(0), vals.at(1));
}
if(success)
return {ovr_rec};
}
std::string clause;
std::vector<std::string> values;
std::tie(clause, values) = problem_config.WhereClause();
Expand Down
5 changes: 2 additions & 3 deletions src/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
namespace miopen {
namespace solver {

MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL)

std::ostream& operator<<(std::ostream& os, const KernelInfo& k)
{
os << k.kernel_file << ", " << k.kernel_name << " g_wk={ ";
Expand All @@ -65,7 +63,8 @@ std::vector<Program> PrecompileKernels(const Handle& h, const std::vector<Kernel

// clang-format off
par_for_strided(kernels.size(),
max_threads{Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}, 20)},
// max_threads{Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}, 20)},
max_threads{GetTuningThreadsMax()},
[&](auto i) {
const KernelInfo& k = kernels[i];
programs[i] = h.LoadProgram(k.kernel_file, k.comp_options, false, "");
Expand Down
Loading

0 comments on commit b4e0a67

Please sign in to comment.