Skip to content

Commit

Permalink
[Fast/Warm restart] Implement helper class for waiting restart done (#…
Browse files Browse the repository at this point in the history
…691)

**Depends on:**

- sonic-net/sonic-sairedis#1100
- sonic-net/sonic-utilities#2286
- sonic-net/sonic-buildimage#11594

**Why I did this?**

Daemons which are not related to warm/fast restart might affect the performance of warm/fast restart. A hardcoded start up delay is the current solution to avoid this.

This PR implements a function to wait warm/fast restart done. This function provided a efficiency and graceful way for daemons to wait warm/fast restart done.

**How I did it?**

Implement a utility function RestartWaiter::waitRestartDone. This function waits warm restart done flag in STATE DB and return true if the flag is set by warm restart finalizer. This function is also exposed as python extension so that python daemons can utilize it.

This PR depends on new fastboot design: https://github.com/sonic-net/SONiC/blob/master/doc/fast-reboot/Fast-reboot_Flow_Improvements_HLD.md
  • Loading branch information
Junchao-Mellanox authored Oct 18, 2022
1 parent bcf48b2 commit 2cae742
Show file tree
Hide file tree
Showing 9 changed files with 540 additions and 1 deletion.
4 changes: 3 additions & 1 deletion common/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ libswsscommon_la_SOURCES = \
warm_restart.cpp \
luatable.cpp \
countertable.cpp \
redisutility.cpp
redisutility.cpp \
restart_waiter.cpp \
redis_table_waiter.cpp

libswsscommon_la_CXXFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(LIBNL_CFLAGS) $(CODE_COVERAGE_CXXFLAGS)
libswsscommon_la_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(LIBNL_CPPFLAGS) $(CODE_COVERAGE_CPPFLAGS)
Expand Down
145 changes: 145 additions & 0 deletions common/redis_table_waiter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#include "redis_table_waiter.h"
#include "select.h"
#include "subscriberstatetable.h"

using namespace swss;

bool RedisTableWaiter::waitUntil(
DBConnector &db,
const std::string &tableName,
unsigned int maxWaitSec,
CheckFunc &checkFunc)
{
if (maxWaitSec == 0)
{
SWSS_LOG_ERROR("Error: invalid maxWaitSec value 0, must be larger than 0");
return false;
}

SubscriberStateTable table(&db, tableName);
Select s;
s.addSelectable(&table);

int maxWaitMs = static_cast<int>(maxWaitSec) * 1000;
int selectTimeout = maxWaitMs;
auto start = std::chrono::steady_clock::now();
while(1)
{
Selectable *sel = NULL;
int ret = s.select(&sel, selectTimeout, true);
if (ret == Select::OBJECT)
{
KeyOpFieldsValuesTuple kco;
table.pop(kco);
if (checkFunc(kco))
{
return true;
}
}
else if (ret == Select::ERROR)
{
SWSS_LOG_NOTICE("Error: wait redis table got error - %s!", strerror(errno));
}
else if (ret == Select::TIMEOUT)
{
SWSS_LOG_INFO("Timeout: wait redis table got select timeout");
}
else if (ret == Select::SIGNALINT)
{
return false;
}

auto end = std::chrono::steady_clock::now();
int delay = static_cast<int>(
std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count());

if (delay >= maxWaitMs)
{
return false;
}

selectTimeout = maxWaitMs - delay;
}

return false;
}

bool RedisTableWaiter::waitUntilFieldSet(
DBConnector &db,
const std::string &tableName,
const std::string &key,
const std::string &fieldName,
unsigned int maxWaitSec,
ConditionFunc &cond)
{
auto sep = SonicDBConfig::getSeparator(&db);
auto value = db.hget(tableName + sep + key, fieldName);
if (value && cond(*value.get()))
{
return true;
}

CheckFunc checkFunc = [&](const KeyOpFieldsValuesTuple &kco) -> bool {
if (SET_COMMAND == kfvOp(kco))
{
if (key == kfvKey(kco))
{
auto& values = kfvFieldsValues(kco);
for (auto& fvt: values)
{
if (fieldName == fvField(fvt))
{
return cond(fvValue(fvt));
}
}
}
}

return false;
};
return waitUntil(db, tableName, maxWaitSec, checkFunc);
}

bool RedisTableWaiter::waitUntilKeySet(
DBConnector &db,
const std::string &tableName,
const std::string &key,
unsigned int maxWaitSec)
{
auto sep = SonicDBConfig::getSeparator(&db);
if (db.exists(tableName + sep + key))
{
return true;
}

CheckFunc checkFunc = [&](const KeyOpFieldsValuesTuple &kco) -> bool {
if (SET_COMMAND == kfvOp(kco))
{
return key == kfvKey(kco);
}
return false;
};
return waitUntil(db, tableName, maxWaitSec, checkFunc);
}

bool RedisTableWaiter::waitUntilKeyDel(
DBConnector &db,
const std::string &tableName,
const std::string &key,
unsigned int maxWaitSec)
{
auto sep = SonicDBConfig::getSeparator(&db);
if (!db.exists(tableName + sep + key))
{
return true;
}

CheckFunc checkFunc = [&](const KeyOpFieldsValuesTuple &kco) -> bool {
if (DEL_COMMAND == kfvOp(kco))
{
return key == kfvKey(kco);
}
return false;
};
return waitUntil(db, tableName, maxWaitSec, checkFunc);
}
43 changes: 43 additions & 0 deletions common/redis_table_waiter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include <functional>
#include <string>

#include "dbconnector.h"

namespace swss
{

class RedisTableWaiter
{
public:
typedef std::function<bool(const std::string &)> ConditionFunc;
typedef std::function<bool(const KeyOpFieldsValuesTuple &)> CheckFunc;

static bool waitUntilFieldSet(DBConnector &db,
const std::string &tableName,
const std::string &key,
const std::string &fieldName,
unsigned int maxWaitSec,
ConditionFunc &cond);


static bool waitUntilKeySet(DBConnector &db,
const std::string &tableName,
const std::string &key,
unsigned int maxWaitSec);

static bool waitUntilKeyDel(DBConnector &db,
const std::string &tableName,
const std::string &key,
unsigned int maxWaitSec);

static bool waitUntil(
DBConnector &db,
const std::string &tableName,
unsigned int maxWaitSec,
CheckFunc &checkFunc);

};

}
92 changes: 92 additions & 0 deletions common/restart_waiter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#include "restart_waiter.h"
#include "redis_table_waiter.h"
#include "redispipeline.h"
#include "schema.h"
#include <boost/algorithm/string.hpp>
#include <string>

using namespace swss;

static const std::string STATE_DB_NAME = "STATE_DB";
static const std::string STATE_DB_SEPARATOR = "|";
static const std::string RESTART_KEY = "system";
static const std::string RESTART_ENABLE_FIELD = "enable";
static const std::string FAST_REBOOT_TABLE_NAME = "FAST_REBOOT";

// waitAdvancedBootDone
bool RestartWaiter::waitAdvancedBootDone(
unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
return isAdvancedBootInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::waitWarmBootDone(
unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
if (isFastBootInProgress(stateDb))
{
// It is fast boot, just return
return true;
}

return isAdvancedBootInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::waitFastBootDone(
unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
if (!isFastBootInProgress(stateDb))
{
// Fast boot is not in progress
return true;
}

return isAdvancedBootInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::doWait(DBConnector &stateDb,
unsigned int maxWaitSec)
{
RedisTableWaiter::ConditionFunc condFunc = [](const std::string &value) -> bool {
std::string copy = value;
boost::to_lower(copy);
return copy == "false";
};
return RedisTableWaiter::waitUntilFieldSet(stateDb,
STATE_WARM_RESTART_ENABLE_TABLE_NAME,
RESTART_KEY,
RESTART_ENABLE_FIELD,
maxWaitSec,
condFunc);
}

bool RestartWaiter::isAdvancedBootInProgress(DBConnector &stateDb)
{
auto ret = stateDb.hget(STATE_WARM_RESTART_ENABLE_TABLE_NAME + STATE_DB_SEPARATOR + RESTART_KEY, RESTART_ENABLE_FIELD);
if (ret) {
std::string value = *ret.get();
boost::to_lower(value);
return value == "true";
}
return false;
}

bool RestartWaiter::isFastBootInProgress(DBConnector &stateDb)
{
auto ret = stateDb.get(FAST_REBOOT_TABLE_NAME + STATE_DB_SEPARATOR + RESTART_KEY);
return ret.get() != nullptr;
}

bool RestartWaiter::isWarmBootInProgress(swss::DBConnector &stateDb)
{
return isAdvancedBootInProgress(stateDb) && !isFastBootInProgress(stateDb);
}
33 changes: 33 additions & 0 deletions common/restart_waiter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include "dbconnector.h"

namespace swss
{

// Helper class to wait for warm/fast reboot done
class RestartWaiter
{
public:
static bool waitAdvancedBootDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool waitWarmBootDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool waitFastBootDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool isAdvancedBootInProgress(swss::DBConnector &stateDb);
static bool isFastBootInProgress(swss::DBConnector &stateDb);
static bool isWarmBootInProgress(swss::DBConnector &stateDb);

private:
static bool doWait(swss::DBConnector &stateDb,
unsigned int maxWaitSec);
};

}
4 changes: 4 additions & 0 deletions pyext/swsscommon.i
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include "events.h"
#include "configdb.h"
#include "status_code_util.h"
#include "redis_table_waiter.h"
#include "restart_waiter.h"
%}

%include <std_string.i>
Expand Down Expand Up @@ -221,3 +223,5 @@ T castSelectableObj(swss::Selectable *temp)
%include "events.h"

%include "status_code_util.h"
#include "redis_table_waiter.h"
%include "restart_waiter.h"
2 changes: 2 additions & 0 deletions tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ tests_SOURCES = redis_ut.cpp \
events_common_ut.cpp \
events_service_ut.cpp \
events_ut.cpp \
restart_waiter_ut.cpp \
redis_table_waiter_ut.cpp \
main.cpp

tests_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(LIBNL_CFLAGS)
Expand Down
Loading

0 comments on commit 2cae742

Please sign in to comment.