Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warm reboot: Orchagent state restore #554

Merged
merged 12 commits into from
Aug 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
INCLUDES = -I $(top_srcdir)
INCLUDES = -I $(top_srcdir) -I $(top_srcdir)/warmrestart

CFLAGS_SAI = -I /usr/include/sai

Expand Down Expand Up @@ -66,12 +66,14 @@ orchagent_SOURCES = \
switchorch.h \
swssnet.h \
tunneldecaporch.h \
crmorch.h
crmorch.h \
request_parser.h \
vrforch.h \
dtelorch.h \
countercheckorch.h \
flexcounterorch.h
flexcounterorch.h \
$(top_srcdir)/warmrestart/warm_restart.cpp \
$(top_srcdir)/warmrestart/warm_restart.h

orchagent_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
orchagent_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
Expand Down
14 changes: 7 additions & 7 deletions orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,7 @@ bool AclRuleDTelFlowWatchListEntry::validateAddAction(string attr_name, string a
sai_object_id_t session_oid;

if (!m_pDTelOrch ||
(attr_name != ACTION_DTEL_FLOW_OP &&
(attr_name != ACTION_DTEL_FLOW_OP &&
attr_name != ACTION_DTEL_INT_SESSION &&
attr_name != ACTION_DTEL_FLOW_SAMPLE_PERCENT &&
attr_name != ACTION_DTEL_REPORT_ALL_PACKETS))
Expand Down Expand Up @@ -1356,7 +1356,7 @@ bool AclRuleDTelFlowWatchListEntry::validateAddAction(string attr_name, string a
value.aclaction.parameter.booldata = (attr_value == DTEL_ENABLED) ? true : false;
value.aclaction.enable = (attr_value == DTEL_ENABLED) ? true : false;
}

m_actions[aclDTelActionLookup[attr_name]] = value;

return true;
Expand Down Expand Up @@ -1514,7 +1514,7 @@ bool AclRuleDTelDropWatchListEntry::validateAddAction(string attr_name, string a

value.aclaction.parameter.booldata = (attr_value == DTEL_ENABLED) ? true : false;
value.aclaction.enable = (attr_value == DTEL_ENABLED) ? true : false;

m_actions[aclDTelActionLookup[attr_name]] = value;

return true;
Expand Down Expand Up @@ -1693,8 +1693,8 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
// initialized before thread start.
auto interv = timespec { .tv_sec = COUNTERS_READ_INTERVAL, .tv_nsec = 0 };
auto timer = new SelectableTimer(interv);
auto executor = new ExecutableTimer(timer, this);
Orch::addExecutor("", executor);
auto executor = new ExecutableTimer(timer, this, "ACL_POLL_TIMER");
Orch::addExecutor(executor);
timer->start();
}

Expand Down Expand Up @@ -2320,7 +2320,7 @@ sai_status_t AclOrch::bindAclTable(sai_object_id_t table_oid, AclTable &aclTable
sai_status_t status = SAI_STATUS_SUCCESS;

SWSS_LOG_INFO("%s table %s to ports", bind ? "Bind" : "Unbind", aclTable.id.c_str());

if (aclTable.ports.empty())
{
if (bind)
Expand Down Expand Up @@ -2431,7 +2431,7 @@ sai_status_t AclOrch::createDTelWatchListTables()
SWSS_LOG_INFO("Successfully created ACL table %s, oid: %lX", flowWLTable.description.c_str(), table_oid);

/* Create Drop watchlist ACL table */

table_attrs.clear();

dropWLTable.id = TABLE_TYPE_DTEL_DROP_WATCHLIST;
Expand Down
4 changes: 2 additions & 2 deletions orchagent/countercheckorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ CounterCheckOrch::CounterCheckOrch(DBConnector *db, vector<string> &tableNames):

auto interv = timespec { .tv_sec = COUNTER_CHECK_POLL_TIMEOUT_SEC, .tv_nsec = 0 };
auto timer = new SelectableTimer(interv);
auto executor = new ExecutableTimer(timer, this);
Orch::addExecutor("MC_COUNTERS_POLL", executor);
auto executor = new ExecutableTimer(timer, this, "MC_COUNTERS_POLL");
Orch::addExecutor(executor);
timer->start();
}

Expand Down
9 changes: 6 additions & 3 deletions orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,11 @@ CrmOrch::CrmOrch(DBConnector *db, string tableName):
m_resourcesMap.emplace(res.first, CrmResourceEntry(res.second, CRM_THRESHOLD_TYPE_DEFAULT, CRM_THRESHOLD_LOW_DEFAULT, CRM_THRESHOLD_HIGH_DEFAULT));
}

auto executor = new ExecutableTimer(m_timer.get(), this);
Orch::addExecutor("CRM_COUNTERS_POLL", executor);
// The CRM stats needs to be populated again
m_countersCrmTable->del(CRM_COUNTERS_TABLE_KEY);
Copy link
Contributor

@qiluo-msft qiluo-msft Aug 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m_countersCrmTable->del(CRM_COUNTERS_TABLE_KEY); [](start = 4, length = 48)

If it is applied to cold start, let's do it in another PR. #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For cold start, no data exists for CRM stats table, so it has no effect.


auto executor = new ExecutableTimer(m_timer.get(), this, "CRM_COUNTERS_POLL");
Orch::addExecutor(executor);
m_timer->start();
}

Expand Down Expand Up @@ -333,7 +336,7 @@ void CrmOrch::decCrmAclUsedCounter(CrmResourceType resource, sai_acl_stage_t sta
{
m_resourcesMap.at(resource).countersMap[getCrmAclKey(stage, point)].usedCounter--;

// Remove ACL table related counters
// Remove ACL table related counters
if (resource == CrmResourceType::CRM_ACL_TABLE)
{
auto & cntMap = m_resourcesMap.at(CrmResourceType::CRM_ACL_TABLE).countersMap;
Expand Down
16 changes: 12 additions & 4 deletions orchagent/fdborch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ FdbOrch::FdbOrch(DBConnector *db, string tableName, PortsOrch *port) :
{
m_portsOrch->attach(this);
m_flushNotificationsConsumer = new NotificationConsumer(db, "FLUSHFDBREQUEST");
auto flushNotifier = new Notifier(m_flushNotificationsConsumer, this);
Orch::addExecutor("", flushNotifier);
auto flushNotifier = new Notifier(m_flushNotificationsConsumer, this, "FLUSHFDBREQUEST");
Orch::addExecutor(flushNotifier);

/* Add FDB notifications support from ASIC */
DBConnector *notificationsDb = new DBConnector(ASIC_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);
m_fdbNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS");
auto fdbNotifier = new Notifier(m_fdbNotificationConsumer, this);
Orch::addExecutor("FDB_NOTIFICATIONS", fdbNotifier);
auto fdbNotifier = new Notifier(m_fdbNotificationConsumer, this, "FDB_NOTIFICATIONS");
Orch::addExecutor(fdbNotifier);
}

void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_object_id_t bridge_port_id)
Expand All @@ -53,6 +53,14 @@ void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_obj
return;
}

// we already have such entries
if (m_entries.find(update.entry) != m_entries.end())
{
SWSS_LOG_INFO("FdbOrch notification: mac %s is already in bv_id 0x%lx",
update.entry.mac.to_string().c_str(), entry->bv_id);
break;
}

update.add = true;

{
Expand Down
30 changes: 21 additions & 9 deletions orchagent/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ extern "C" {
#include "saihelper.h"
#include "notifications.h"
#include <signal.h>
#include "warm_restart.h"

using namespace std;
using namespace swss;
Expand Down Expand Up @@ -78,11 +79,29 @@ void sighup_handler(int signo)
}
}

void syncd_apply_view()
{
SWSS_LOG_NOTICE("Notify syncd APPLY_VIEW");

sai_status_t status;
sai_attribute_t attr;
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW;
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status);
exit(EXIT_FAILURE);
}
}

int main(int argc, char **argv)
{
swss::Logger::linkToDbNative("orchagent");

SWSS_LOG_ENTER();
WarmStart::checkWarmStart("orchagent");

if (signal(SIGHUP, sighup_handler) == SIG_ERR)
{
Expand Down Expand Up @@ -262,16 +281,9 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}

SWSS_LOG_NOTICE("Notify syncd APPLY_VIEW");

attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW;
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);

if (status != SAI_STATUS_SUCCESS)
if (!WarmStart::isWarmStart())
{
SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status);
exit(EXIT_FAILURE);
syncd_apply_view();
}

orchDaemon->start();
Expand Down
4 changes: 2 additions & 2 deletions orchagent/notifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

class Notifier : public Executor {
public:
Notifier(NotificationConsumer *select, Orch *orch)
: Executor(select, orch)
Notifier(NotificationConsumer *select, Orch *orch, const string &name)
: Executor(select, orch, name)
{
}

Expand Down
66 changes: 47 additions & 19 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,30 @@ void Consumer::drain()
m_orch->doTask(*this);
}

string Consumer::dumpTuple(KeyOpFieldsValuesTuple &tuple)
{
string s = getTableName() + getConsumerTable()->getTableNameSeparator() + kfvKey(tuple)
+ "|" + kfvOp(tuple);
Copy link
Contributor

@qiluo-msft qiluo-msft Aug 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

| [](start = 18, length = 1)

getTableNameSeparator() could be '|', do you want to change this '|'? #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This a good question.
We had it fixed to ":" previously. Changed it to getConsumerTable()->getTableNameSeparator() upon comment. I think one of he benefits of doing this is we'll have the same key format as that in redis DB. Any other idea?

for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++)
{
s += "|" + fvField(*i) + ":" + fvValue(*i);
}

return s;
}

void Consumer::dumpPendingTasks(vector<string> &ts)
{
for (auto &tm : m_toSync)
{
KeyOpFieldsValuesTuple& tuple = tm.second;

string s = dumpTuple(tuple);

ts.push_back(s);
}
}

size_t Orch::addExistingData(const string& tableName)
{
auto consumer = dynamic_cast<Consumer *>(getExecutor(tableName));
Expand Down Expand Up @@ -224,7 +248,7 @@ bool Orch::bake()
{
continue;
}

size_t refilled = consumer->refillToSync();
SWSS_LOG_NOTICE("Add warm input: %s, %zd", executorName.c_str(), refilled);
}
Expand Down Expand Up @@ -326,6 +350,21 @@ void Orch::doTask()
}
}

void Orch::dumpPendingTasks(vector<string> &ts)
{
for(auto &it : m_consumerMap)
{
Consumer* consumer = dynamic_cast<Consumer *>(it.second.get());
if (consumer == NULL)
{
SWSS_LOG_DEBUG("Executor is not a Consumer");
continue;
}

consumer->dumpPendingTasks(ts);
}
}

void Orch::logfileReopen()
{
gRecordOfs.close();
Expand All @@ -347,12 +386,7 @@ void Orch::logfileReopen()

void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple)
{
string s = consumer.getTableName() + ":" + kfvKey(tuple)
+ "|" + kfvOp(tuple);
for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++)
{
s += "|" + fvField(*i) + ":" + fvValue(*i);
}
string s = consumer.dumpTuple(tuple);

gRecordOfs << getTimestamp() << "|" << s << endl;

Expand All @@ -366,13 +400,7 @@ void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple)

string Orch::dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple)
{
string s = consumer.getTableName() + ":" + kfvKey(tuple)
+ "|" + kfvOp(tuple);
for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++)
{
s += "|" + fvField(*i) + ":" + fvValue(*i);
}

string s = consumer.dumpTuple(tuple);
return s;
}

Expand Down Expand Up @@ -461,24 +489,24 @@ void Orch::addConsumer(DBConnector *db, string tableName, int pri)
{
if (db->getDbId() == CONFIG_DB || db->getDbId() == STATE_DB)
{
addExecutor(tableName, new Consumer(new SubscriberStateTable(db, tableName, TableConsumable::DEFAULT_POP_BATCH_SIZE, pri), this));
addExecutor(new Consumer(new SubscriberStateTable(db, tableName, TableConsumable::DEFAULT_POP_BATCH_SIZE, pri), this, tableName));
}
else
{
addExecutor(tableName, new Consumer(new ConsumerStateTable(db, tableName, gBatchSize, pri), this));
addExecutor(new Consumer(new ConsumerStateTable(db, tableName, gBatchSize, pri), this, tableName));
}
}

void Orch::addExecutor(string executorName, Executor* executor)
void Orch::addExecutor(Executor* executor)
{
auto inserted = m_consumerMap.emplace(std::piecewise_construct,
std::forward_as_tuple(executorName),
std::forward_as_tuple(executor->getName()),
std::forward_as_tuple(executor));

// If there is duplication of executorName in m_consumerMap, logic error
if (!inserted.second)
{
SWSS_LOG_THROW("Duplicated executorName in m_consumerMap: %s", executorName.c_str());
SWSS_LOG_THROW("Duplicated executorName in m_consumerMap: %s", executor->getName().c_str());
}
}

Expand Down
Loading