From 4b34c1b13b838fb86aaa25716d809c9ac01404aa Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Sun, 29 Jul 2018 22:52:47 -0700 Subject: [PATCH 1/9] Orchagent state restore Signed-off-by: Jipan Yang --- orchagent/Makefile.am | 8 +- orchagent/aclorch.cpp | 1 + orchagent/countercheckorch.cpp | 1 + orchagent/crmorch.cpp | 6 +- orchagent/fdborch.cpp | 11 +++ orchagent/intfsorch.cpp | 16 +++- orchagent/main.cpp | 32 +++++--- orchagent/neighorch.cpp | 4 + orchagent/orch.cpp | 32 ++++++++ orchagent/orch.h | 28 +++++++ orchagent/orchdaemon.cpp | 130 +++++++++++++++++++++++++++++++-- orchagent/orchdaemon.h | 2 + orchagent/pfcwdorch.cpp | 4 +- orchagent/portsorch.cpp | 7 +- orchagent/routeorch.cpp | 5 ++ 15 files changed, 263 insertions(+), 24 deletions(-) diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index e2bd73ff4e..24249c5ff2 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -1,4 +1,4 @@ -INCLUDES = -I $(top_srcdir) +INCLUDES = -I $(top_srcdir) -I $(top_srcdir)/warmrestart CFLAGS_SAI = -I /usr/include/sai @@ -66,12 +66,14 @@ orchagent_SOURCES = \ switchorch.h \ swssnet.h \ tunneldecaporch.h \ - crmorch.h + crmorch.h \ request_parser.h \ vrforch.h \ dtelorch.h \ countercheckorch.h \ - flexcounterorch.h + flexcounterorch.h \ + $(top_srcdir)/warmrestart/warm_restart.cpp \ + $(top_srcdir)/warmrestart/warm_restart.h orchagent_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI) orchagent_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI) diff --git a/orchagent/aclorch.cpp b/orchagent/aclorch.cpp index 1922942d37..68bf36a148 100644 --- a/orchagent/aclorch.cpp +++ b/orchagent/aclorch.cpp @@ -1694,6 +1694,7 @@ void AclOrch::init(vector& connectors, PortsOrch *portOrch, Mirr auto interv = timespec { .tv_sec = COUNTERS_READ_INTERVAL, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); auto executor = new ExecutableTimer(timer, this); + executor->setName("ACL_POLL_TIMER"); Orch::addExecutor("", executor); timer->start(); } diff --git a/orchagent/countercheckorch.cpp b/orchagent/countercheckorch.cpp index f404c07e51..9d2cc25f34 100644 --- a/orchagent/countercheckorch.cpp +++ b/orchagent/countercheckorch.cpp @@ -31,6 +31,7 @@ CounterCheckOrch::CounterCheckOrch(DBConnector *db, vector &tableNames): auto interv = timespec { .tv_sec = COUNTER_CHECK_POLL_TIMEOUT_SEC, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); auto executor = new ExecutableTimer(timer, this); + executor->setName("MC_COUNTERS_POLL"); Orch::addExecutor("MC_COUNTERS_POLL", executor); timer->start(); } diff --git a/orchagent/crmorch.cpp b/orchagent/crmorch.cpp index e7e89eac83..b0b2dfdfd2 100644 --- a/orchagent/crmorch.cpp +++ b/orchagent/crmorch.cpp @@ -162,7 +162,11 @@ CrmOrch::CrmOrch(DBConnector *db, string tableName): m_resourcesMap.emplace(res.first, CrmResourceEntry(res.second, CRM_THRESHOLD_TYPE_DEFAULT, CRM_THRESHOLD_LOW_DEFAULT, CRM_THRESHOLD_HIGH_DEFAULT)); } + // The CRM stats needs to be populated again + m_countersCrmTable->del(CRM_COUNTERS_TABLE_KEY); + auto executor = new ExecutableTimer(m_timer.get(), this); + executor->setName("CRM_COUNTERS_POLL"); Orch::addExecutor("CRM_COUNTERS_POLL", executor); m_timer->start(); } @@ -333,7 +337,7 @@ void CrmOrch::decCrmAclUsedCounter(CrmResourceType resource, sai_acl_stage_t sta { m_resourcesMap.at(resource).countersMap[getCrmAclKey(stage, point)].usedCounter--; - // Remove ACL table related counters + // Remove ACL table related counters if (resource == CrmResourceType::CRM_ACL_TABLE) { auto & cntMap = m_resourcesMap.at(CrmResourceType::CRM_ACL_TABLE).countersMap; diff --git a/orchagent/fdborch.cpp b/orchagent/fdborch.cpp index 7d265d3537..f01f2cf95e 100644 --- a/orchagent/fdborch.cpp +++ b/orchagent/fdborch.cpp @@ -27,13 +27,17 @@ FdbOrch::FdbOrch(DBConnector *db, string tableName, PortsOrch *port) : m_portsOrch->attach(this); m_flushNotificationsConsumer = new NotificationConsumer(db, "FLUSHFDBREQUEST"); auto flushNotifier = new Notifier(m_flushNotificationsConsumer, this); + flushNotifier->setName("FLUSHFDBREQUEST"); Orch::addExecutor("", flushNotifier); /* Add FDB notifications support from ASIC */ DBConnector *notificationsDb = new DBConnector(ASIC_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); m_fdbNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS"); auto fdbNotifier = new Notifier(m_fdbNotificationConsumer, this); + fdbNotifier->setName("FDB_NOTIFICATIONS"); Orch::addExecutor("FDB_NOTIFICATIONS", fdbNotifier); + + addExistingData(tableName); } void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_object_id_t bridge_port_id) @@ -53,6 +57,13 @@ void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_obj return; } + if (m_entries.count(update.entry) != 0) // we already have such entries + { + SWSS_LOG_INFO("FdbOrch notification: mac %s is already in bv_id 0x%lx", + update.entry.mac.to_string().c_str(), entry->bv_id); + break; + } + update.add = true; { diff --git a/orchagent/intfsorch.cpp b/orchagent/intfsorch.cpp index a49b52ec4e..e7f7d6cd0d 100644 --- a/orchagent/intfsorch.cpp +++ b/orchagent/intfsorch.cpp @@ -29,6 +29,10 @@ IntfsOrch::IntfsOrch(DBConnector *db, string tableName) : Orch(db, tableName, intfsorch_pri) { SWSS_LOG_ENTER(); + + // Read the pre-existing data for INTF in appDB. + addExistingData(APP_INTF_TABLE_NAME); + } sai_object_id_t IntfsOrch::getRouterIntfsId(const string &alias) @@ -86,7 +90,16 @@ void IntfsOrch::doTask(Consumer &consumer) { if (alias == "lo") { - addIp2MeRoute(ip_prefix); + // set request for lo may come after warm start restore + auto it_intfs = m_syncdIntfses.find(alias); + if (it_intfs == m_syncdIntfses.end()) + { + IntfsEntry intfs_entry; + intfs_entry.ref_count = 0; + m_syncdIntfses[alias] = intfs_entry; + addIp2MeRoute(ip_prefix); + } + it = consumer.m_toSync.erase(it); continue; } @@ -171,6 +184,7 @@ void IntfsOrch::doTask(Consumer &consumer) { if (alias == "lo") { + m_syncdIntfses.erase(alias); removeIp2MeRoute(ip_prefix); it = consumer.m_toSync.erase(it); continue; diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 530fa016eb..be4a7e3df2 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -22,6 +22,7 @@ extern "C" { #include "saihelper.h" #include "notifications.h" #include +#include using namespace std; using namespace swss; @@ -77,6 +78,23 @@ void sighup_handler(int signo) } } +void syncd_apply_view() +{ + SWSS_LOG_NOTICE("Notify syncd APPLY_VIEW"); + + sai_status_t status; + sai_attribute_t attr; + attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD; + attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW; + status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status); + exit(EXIT_FAILURE); + } +} + int main(int argc, char **argv) { swss::Logger::linkToDbNative("orchagent"); @@ -251,6 +269,8 @@ int main(int argc, char **argv) DBConnector config_db(CONFIG_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); DBConnector state_db(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); + WarmStart::checkWarmStart("orchagent"); + OrchDaemon *orchDaemon = new OrchDaemon(&appl_db, &config_db, &state_db); if (!orchDaemon->init()) { @@ -261,17 +281,9 @@ int main(int argc, char **argv) try { - SWSS_LOG_NOTICE("Notify syncd APPLY_VIEW"); - - attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD; - attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW; - status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); - - if (status != SAI_STATUS_SUCCESS) + if (!WarmStart::isWarmStart()) { - SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status); - delete orchDaemon; - exit(EXIT_FAILURE); + syncd_apply_view(); } orchDaemon->start(); diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index 585f91b967..e0432bb414 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -19,6 +19,10 @@ NeighOrch::NeighOrch(DBConnector *db, string tableName, IntfsOrch *intfsOrch) : Orch(db, tableName, neighorch_pri), m_intfsOrch(intfsOrch) { SWSS_LOG_ENTER(); + + // Read the pre-existing data for neigh in appDB. + addExistingData(tableName); + } bool NeighOrch::hasNextHop(IpAddress ipAddress) diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index f86dd849ce..0293155198 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -171,6 +171,23 @@ void Consumer::drain() m_orch->doTask(*this); } +void Consumer::dumpTasks(vector &ts) +{ + for (auto &tm :m_toSync) + { + KeyOpFieldsValuesTuple& tuple = tm.second; + + string s = getTableName() + ":" + kfvKey(tuple) + + "|" + kfvOp(tuple); + for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) + { + s += "|" + fvField(*i) + ":" + fvValue(*i); + } + + ts.push_back(s); + } +} + bool Orch::addExistingData(const string& tableName) { Consumer* consumer = dynamic_cast(getExecutor(tableName)); @@ -293,6 +310,21 @@ void Orch::doTask() } } +void Orch::dumpTasks(vector &ts) +{ + for(auto &it : m_consumerMap) + { + Consumer* consumer = dynamic_cast(it.second.get()); + if (consumer == NULL) + { + SWSS_LOG_DEBUG("Executor is not a Consumer"); + continue; + } + + consumer->dumpTasks(ts); + } +} + void Orch::logfileReopen() { gRecordOfs.close(); diff --git a/orchagent/orch.h b/orchagent/orch.h index 902192f16b..156a0fee6e 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -91,10 +91,22 @@ class Executor : public Selectable virtual void execute() { } virtual void drain() { } + virtual string getName() const + { + return m_name; + } + virtual void setName(string name) + { + m_name = name; + } + protected: Selectable *m_selectable; Orch *m_orch; + // Name for Executor + string m_name; + // Get the underlying selectable Selectable *getSelectable() const { return m_selectable; } }; @@ -116,10 +128,24 @@ class Consumer : public Executor { return getConsumerTable()->getTableName(); } + + string getName() const + { + return getConsumerTable()->getTableName(); + } + + int getDbId() const + { + return getConsumerTable()->getDbId(); + } + + void dumpTasks(vector &ts); + void addToSync(std::deque &entries); void refillToSync(); void refillToSync(Table* table); void execute(); + void drain(); /* Store the latest 'golden' status */ @@ -166,6 +192,8 @@ class Orch /* TODO: refactor recording */ static void recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple); + + void dumpTasks(vector &ts); protected: ConsumerMap m_consumerMap; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 26bd446da2..c4f1ee7069 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -3,6 +3,7 @@ #include "orchdaemon.h" #include "logger.h" #include +#include "warm_restart.h" #define SAI_SWITCH_ATTR_CUSTOM_RANGE_BASE SAI_SWITCH_ATTR_CUSTOM_RANGE_START #include "sairedis.h" @@ -16,7 +17,7 @@ using namespace swss; extern sai_switch_api_t* sai_switch_api; extern sai_object_id_t gSwitchId; - +extern void syncd_apply_view(); /* * Global orch daemon variables */ @@ -27,6 +28,7 @@ RouteOrch *gRouteOrch; AclOrch *gAclOrch; CrmOrch *gCrmOrch; BufferOrch *gBufferOrch; +SwitchOrch *gSwitchOrch; OrchDaemon::OrchDaemon(DBConnector *applDb, DBConnector *configDb, DBConnector *stateDb) : m_applDb(applDb), @@ -49,7 +51,7 @@ bool OrchDaemon::init() string platform = getenv("platform") ? getenv("platform") : ""; - SwitchOrch *switch_orch = new SwitchOrch(m_applDb, APP_SWITCH_TABLE_NAME); + gSwitchOrch = new SwitchOrch(m_applDb, APP_SWITCH_TABLE_NAME); const int portsorch_base_pri = 40; @@ -116,7 +118,15 @@ bool OrchDaemon::init() CFG_DTEL_EVENT_TABLE_NAME }; - m_orchList = { switch_orch, gCrmOrch, gBufferOrch, gPortsOrch, intfs_orch, gNeighOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, mirror_orch }; + /* + * The order of the orch list is important for state restore of warm start and + * the queued processing in m_toSync map after gPortsOrch->isInitDone() is set. + * + * For the multiple consumers in ports_tables, tasks for LAG_TABLE is processed before VLAN_TABLE + * when iterating ConsumerMap. + * That is ensured implicitly by the order of map key, "LAG_TABLE" is smaller than "VLAN_TABLE" in lexicographic order. + */ + m_orchList = { gSwitchOrch, gCrmOrch, gBufferOrch, gPortsOrch, intfs_orch, gNeighOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) @@ -152,13 +162,13 @@ bool OrchDaemon::init() gAclOrch = new AclOrch(acl_table_connectors, gPortsOrch, mirror_orch, gNeighOrch, gRouteOrch); } - m_orchList.push_back(gAclOrch); m_orchList.push_back(gFdbOrch); + m_orchList.push_back(mirror_orch); + m_orchList.push_back(gAclOrch); m_orchList.push_back(vrf_orch); m_select = new Select(); - vector flex_counter_tables = { CFG_FLEX_COUNTER_TABLE_NAME }; @@ -279,6 +289,16 @@ void OrchDaemon::start() m_select->addSelectables(o->getSelectables()); } + bool restored = true; + // executorSet stores all Executors which have data/task to be processed + // after state restore phase of warm start. + set executorSet; + if (WarmStart::isWarmStart()) + { + restored = false; + WarmStart::setWarmStartState("orchagent", WarmStart::INIT); + } + while (true) { Selectable *s; @@ -304,7 +324,32 @@ void OrchDaemon::start() } auto *c = (Executor *)s; - c->execute(); + + if (restored) + { + c->execute(); + } + else + { + /* + * Don't process any new data other than those from ConfigDB + * before state restore is finished. + * stateDbLagTable is a special case, create/delete of LAG is controlled + * from configDB. It is assumed that no configDB change during warm restart. + */ + + Consumer* consumer = dynamic_cast(c); + if (consumer != NULL && (consumer->getDbId() == CONFIG_DB || consumer->getDbId() == STATE_DB)) + { + c->execute(); + } + else if (executorSet.find(c) == executorSet.end()) + { + executorSet.insert(c); + SWSS_LOG_NOTICE("Task for executor %s is being postponed after state restore", + c->getName().c_str()); + } + } /* After each iteration, periodically check all m_toSync map to * execute all the remaining tasks that need to be retried. */ @@ -313,5 +358,78 @@ void OrchDaemon::start() for (Orch *o : m_orchList) o->doTask(); + /* + * All data to be restored have been added to m_toSync of each orch + * at contructor phase. And the order of m_orchList guranteed the + * dependency of tasks had been met, restore is done. + */ + if (!restored && m_select->isQueueEmpty() && gPortsOrch->isInitDone()) + { + /* + * drain remaining data that are out of order like LAG_MEMBER_TABLE and VLAN_MEMBER_TABLE + * since they were checked before LAG_TABLE and VLAN_TABLE. + */ + for (Orch *o : m_orchList) + { + o->doTask(); + } + + warmRestoreValidation(); + SWSS_LOG_NOTICE("Orchagent state restore done"); + restored = true; + syncd_apply_view(); + + /* Pick up those tasks postponed by restore processing */ + if(!executorSet.empty()) + { + for (Executor *c : executorSet) + { + c->execute(); + } + for (Orch *o : m_orchList) + { + o->doTask(); + } + } + } + } +} + +/* + * Get tasks to sync for consumers of each orch being managed by this orch daemon + */ +void OrchDaemon::getTaskToSync(vector &ts) +{ + for (Orch *o : m_orchList) + { + o->dumpTasks(ts); + } +} + + +/* Perform basic validation after start restore for warm start */ +bool OrchDaemon::warmRestoreValidation() +{ + /* + * No pending task should exist for any of the consumer at this point. + * All the prexisting data in appDB and configDb have been read and processed. + */ + vector ts; + getTaskToSync(ts); + if (ts.size() != 0) + { + // TODO: change it to fatal once staged ProducerStateTable/ConsumerStateTable change and + // pre-warmStart consistency validation are ready. + SWSS_LOG_ERROR("There are pending consumer tasks after restore: "); + for(auto &s : ts) + { + SWSS_LOG_ERROR("%s", s.c_str()); + } + return false; + } + else + { + WarmStart::setWarmStartState("orchagent", WarmStart::RESTORED); + return true; } } diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 0213c24674..db17c07b8e 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -34,6 +34,8 @@ class OrchDaemon bool init(); void start(); + void getTaskToSync(vector &ts); + bool warmRestoreValidation(); private: DBConnector *m_applDb; DBConnector *m_configDb; diff --git a/orchagent/pfcwdorch.cpp b/orchagent/pfcwdorch.cpp index 5278d76bc7..8ab271226c 100644 --- a/orchagent/pfcwdorch.cpp +++ b/orchagent/pfcwdorch.cpp @@ -619,7 +619,7 @@ PfcWdSwOrch::PfcWdSwOrch( vector &tableNames, const vector &portStatIds, const vector &queueStatIds, - const vector &queueAttrIds, + const vector &queueAttrIds, int pollInterval): PfcWdOrch(db, tableNames), m_flexCounterDb(new DBConnector(FLEX_COUNTER_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)), @@ -669,11 +669,13 @@ PfcWdSwOrch::PfcWdSwOrch( PfcWdSwOrch::getCountersDb().get(), "PFC_WD"); auto wdNotification = new Notifier(consumer, this); + wdNotification->setName("PFC_WD"); Orch::addExecutor("PFC_WD", wdNotification); auto interv = timespec { .tv_sec = COUNTER_CHECK_POLL_TIMEOUT_SEC, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); auto executor = new ExecutableTimer(timer, this); + executor->setName("PFC_WD_COUNTERS_POLL"); Orch::addExecutor("PFC_WD_COUNTERS_POLL", executor); timer->start(); } diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 8d6f58988c..48686cc286 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -244,6 +244,7 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) DBConnector *notificationsDb = new DBConnector(ASIC_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); m_portStatusNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS"); auto portStatusNotificatier = new Notifier(m_portStatusNotificationConsumer, this); + portStatusNotificatier->setName("PORT_STATUS_NOTIFICATIONS"); Orch::addExecutor("PORT_STATUS_NOTIFICATIONS", portStatusNotificatier); // Try warm start @@ -696,7 +697,7 @@ bool PortsOrch::setPortPvid(Port &port, sai_uint32_t pvid) if (port.m_rif_id) { - SWSS_LOG_ERROR("pvid setting for router interface is not allowed"); + SWSS_LOG_ERROR("pvid setting for router interface %s is not allowed", port.m_alias.c_str()); return false; } @@ -1206,7 +1207,9 @@ bool PortsOrch::bake() if (m_portCount != keys.size() - 2) { // Invalid port table - SWSS_LOG_ERROR("Invalid port table: m_portCount"); + SWSS_LOG_ERROR("Invalid port table: m_portCount, expecting %u, got %lu", + m_portCount, keys.size() - 2); + cleanPortTable(keys); return false; } diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index 18e8d61769..be49726303 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -102,6 +102,11 @@ RouteOrch::RouteOrch(DBConnector *db, string tableName, NeighOrch *neighOrch) : m_syncdRoutes[v6_default_ip_prefix] = IpAddresses(); SWSS_LOG_NOTICE("Create IPv6 default route with packet action drop"); + + + // Read the pre-existing data for route in appDB. + addExistingData(tableName); + } bool RouteOrch::hasNextHopGroup(const IpAddresses& ipAddresses) const From 17398e8981fb4f9e1e52e0734ecdad11e77b9e7a Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Thu, 2 Aug 2018 16:19:04 -0700 Subject: [PATCH 2/9] Adpat to the new warm reboot schema Signed-off-by: Jipan Yang --- orchagent/orchdaemon.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index c4f1ee7069..5ff7b0e002 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -379,6 +379,9 @@ void OrchDaemon::start() restored = true; syncd_apply_view(); + // TODO: should be set after port/fdb/arp sync up + WarmStart::setWarmStartState("orchagent", WarmStart::RECONCILED); + /* Pick up those tasks postponed by restore processing */ if(!executorSet.empty()) { From e2d262bdd0720fb31cb1e36ed5a55e72ebc1d235 Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Thu, 9 Aug 2018 14:43:29 -0700 Subject: [PATCH 3/9] Set Executor name at constructor phase Signed-off-by: Jipan Yang --- orchagent/aclorch.cpp | 15 +++++++-------- orchagent/countercheckorch.cpp | 5 ++--- orchagent/crmorch.cpp | 5 ++--- orchagent/fdborch.cpp | 10 ++++------ orchagent/notifier.h | 4 ++-- orchagent/orch.cpp | 8 ++++---- orchagent/orch.h | 19 +++++-------------- orchagent/pfcwdorch.cpp | 10 ++++------ orchagent/portsorch.cpp | 5 ++--- orchagent/timer.h | 4 ++-- 10 files changed, 34 insertions(+), 51 deletions(-) diff --git a/orchagent/aclorch.cpp b/orchagent/aclorch.cpp index 68bf36a148..2589e4507c 100644 --- a/orchagent/aclorch.cpp +++ b/orchagent/aclorch.cpp @@ -1291,7 +1291,7 @@ bool AclRuleDTelFlowWatchListEntry::validateAddAction(string attr_name, string a sai_object_id_t session_oid; if (!m_pDTelOrch || - (attr_name != ACTION_DTEL_FLOW_OP && + (attr_name != ACTION_DTEL_FLOW_OP && attr_name != ACTION_DTEL_INT_SESSION && attr_name != ACTION_DTEL_FLOW_SAMPLE_PERCENT && attr_name != ACTION_DTEL_REPORT_ALL_PACKETS)) @@ -1356,7 +1356,7 @@ bool AclRuleDTelFlowWatchListEntry::validateAddAction(string attr_name, string a value.aclaction.parameter.booldata = (attr_value == DTEL_ENABLED) ? true : false; value.aclaction.enable = (attr_value == DTEL_ENABLED) ? true : false; } - + m_actions[aclDTelActionLookup[attr_name]] = value; return true; @@ -1514,7 +1514,7 @@ bool AclRuleDTelDropWatchListEntry::validateAddAction(string attr_name, string a value.aclaction.parameter.booldata = (attr_value == DTEL_ENABLED) ? true : false; value.aclaction.enable = (attr_value == DTEL_ENABLED) ? true : false; - + m_actions[aclDTelActionLookup[attr_name]] = value; return true; @@ -1693,9 +1693,8 @@ void AclOrch::init(vector& connectors, PortsOrch *portOrch, Mirr // initialized before thread start. auto interv = timespec { .tv_sec = COUNTERS_READ_INTERVAL, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); - auto executor = new ExecutableTimer(timer, this); - executor->setName("ACL_POLL_TIMER"); - Orch::addExecutor("", executor); + auto executor = new ExecutableTimer(timer, this, "ACL_POLL_TIMER"); + Orch::addExecutor(executor); timer->start(); } @@ -2321,7 +2320,7 @@ sai_status_t AclOrch::bindAclTable(sai_object_id_t table_oid, AclTable &aclTable sai_status_t status = SAI_STATUS_SUCCESS; SWSS_LOG_INFO("%s table %s to ports", bind ? "Bind" : "Unbind", aclTable.id.c_str()); - + if (aclTable.ports.empty()) { if (bind) @@ -2432,7 +2431,7 @@ sai_status_t AclOrch::createDTelWatchListTables() SWSS_LOG_INFO("Successfully created ACL table %s, oid: %lX", flowWLTable.description.c_str(), table_oid); /* Create Drop watchlist ACL table */ - + table_attrs.clear(); dropWLTable.id = TABLE_TYPE_DTEL_DROP_WATCHLIST; diff --git a/orchagent/countercheckorch.cpp b/orchagent/countercheckorch.cpp index 9d2cc25f34..aa46758626 100644 --- a/orchagent/countercheckorch.cpp +++ b/orchagent/countercheckorch.cpp @@ -30,9 +30,8 @@ CounterCheckOrch::CounterCheckOrch(DBConnector *db, vector &tableNames): auto interv = timespec { .tv_sec = COUNTER_CHECK_POLL_TIMEOUT_SEC, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); - auto executor = new ExecutableTimer(timer, this); - executor->setName("MC_COUNTERS_POLL"); - Orch::addExecutor("MC_COUNTERS_POLL", executor); + auto executor = new ExecutableTimer(timer, this, "MC_COUNTERS_POLL"); + Orch::addExecutor(executor); timer->start(); } diff --git a/orchagent/crmorch.cpp b/orchagent/crmorch.cpp index b0b2dfdfd2..56f7e5bb38 100644 --- a/orchagent/crmorch.cpp +++ b/orchagent/crmorch.cpp @@ -165,9 +165,8 @@ CrmOrch::CrmOrch(DBConnector *db, string tableName): // The CRM stats needs to be populated again m_countersCrmTable->del(CRM_COUNTERS_TABLE_KEY); - auto executor = new ExecutableTimer(m_timer.get(), this); - executor->setName("CRM_COUNTERS_POLL"); - Orch::addExecutor("CRM_COUNTERS_POLL", executor); + auto executor = new ExecutableTimer(m_timer.get(), this, "CRM_COUNTERS_POLL"); + Orch::addExecutor(executor); m_timer->start(); } diff --git a/orchagent/fdborch.cpp b/orchagent/fdborch.cpp index f01f2cf95e..2b92fb4080 100644 --- a/orchagent/fdborch.cpp +++ b/orchagent/fdborch.cpp @@ -26,16 +26,14 @@ FdbOrch::FdbOrch(DBConnector *db, string tableName, PortsOrch *port) : { m_portsOrch->attach(this); m_flushNotificationsConsumer = new NotificationConsumer(db, "FLUSHFDBREQUEST"); - auto flushNotifier = new Notifier(m_flushNotificationsConsumer, this); - flushNotifier->setName("FLUSHFDBREQUEST"); - Orch::addExecutor("", flushNotifier); + auto flushNotifier = new Notifier(m_flushNotificationsConsumer, this, "FLUSHFDBREQUEST"); + Orch::addExecutor(flushNotifier); /* Add FDB notifications support from ASIC */ DBConnector *notificationsDb = new DBConnector(ASIC_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); m_fdbNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS"); - auto fdbNotifier = new Notifier(m_fdbNotificationConsumer, this); - fdbNotifier->setName("FDB_NOTIFICATIONS"); - Orch::addExecutor("FDB_NOTIFICATIONS", fdbNotifier); + auto fdbNotifier = new Notifier(m_fdbNotificationConsumer, this, "FDB_NOTIFICATIONS"); + Orch::addExecutor(fdbNotifier); addExistingData(tableName); } diff --git a/orchagent/notifier.h b/orchagent/notifier.h index 6113c80527..ca9011f23d 100644 --- a/orchagent/notifier.h +++ b/orchagent/notifier.h @@ -2,8 +2,8 @@ class Notifier : public Executor { public: - Notifier(NotificationConsumer *select, Orch *orch) - : Executor(select, orch) + Notifier(NotificationConsumer *select, Orch *orch, const string &name) + : Executor(select, orch, name) { } diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index 0293155198..87d369e254 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -460,18 +460,18 @@ void Orch::addConsumer(DBConnector *db, string tableName, int pri) { if (db->getDbId() == CONFIG_DB || db->getDbId() == STATE_DB) { - addExecutor(tableName, new Consumer(new SubscriberStateTable(db, tableName, TableConsumable::DEFAULT_POP_BATCH_SIZE, pri), this)); + addExecutor(new Consumer(new SubscriberStateTable(db, tableName, TableConsumable::DEFAULT_POP_BATCH_SIZE, pri), this, tableName)); } else { - addExecutor(tableName, new Consumer(new ConsumerStateTable(db, tableName, gBatchSize, pri), this)); + addExecutor(new Consumer(new ConsumerStateTable(db, tableName, gBatchSize, pri), this, tableName)); } } -void Orch::addExecutor(string executorName, Executor* executor) +void Orch::addExecutor(Executor* executor) { m_consumerMap.emplace(std::piecewise_construct, - std::forward_as_tuple(executorName), + std::forward_as_tuple(executor->getName()), std::forward_as_tuple(executor)); } diff --git a/orchagent/orch.h b/orchagent/orch.h index 156a0fee6e..0e6a3c793c 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -68,9 +68,10 @@ class Orch; class Executor : public Selectable { public: - Executor(Selectable *selectable, Orch *orch) + Executor(Selectable *selectable, Orch *orch, const string &name) : m_selectable(selectable) , m_orch(orch) + , m_name(name) { } @@ -95,10 +96,6 @@ class Executor : public Selectable { return m_name; } - virtual void setName(string name) - { - m_name = name; - } protected: Selectable *m_selectable; @@ -113,8 +110,8 @@ class Executor : public Selectable class Consumer : public Executor { public: - Consumer(ConsumerTableBase *select, Orch *orch) - : Executor(select, orch) + Consumer(ConsumerTableBase *select, Orch *orch, const string &name) + : Executor(select, orch, name) { } @@ -128,12 +125,6 @@ class Consumer : public Executor { return getConsumerTable()->getTableName(); } - - string getName() const - { - return getConsumerTable()->getTableName(); - } - int getDbId() const { return getConsumerTable()->getDbId(); @@ -205,7 +196,7 @@ class Orch ref_resolve_status resolveFieldRefArray(type_map&, const string&, KeyOpFieldsValuesTuple&, vector&); /* Note: consumer will be owned by this class */ - void addExecutor(string executorName, Executor* executor); + void addExecutor(Executor* executor); Executor *getExecutor(string executorName); private: void addConsumer(DBConnector *db, string tableName, int pri = default_orch_pri); diff --git a/orchagent/pfcwdorch.cpp b/orchagent/pfcwdorch.cpp index 8ab271226c..de892e30d4 100644 --- a/orchagent/pfcwdorch.cpp +++ b/orchagent/pfcwdorch.cpp @@ -668,15 +668,13 @@ PfcWdSwOrch::PfcWdSwOrch( auto consumer = new swss::NotificationConsumer( PfcWdSwOrch::getCountersDb().get(), "PFC_WD"); - auto wdNotification = new Notifier(consumer, this); - wdNotification->setName("PFC_WD"); - Orch::addExecutor("PFC_WD", wdNotification); + auto wdNotification = new Notifier(consumer, this, "PFC_WD"); + Orch::addExecutor(wdNotification); auto interv = timespec { .tv_sec = COUNTER_CHECK_POLL_TIMEOUT_SEC, .tv_nsec = 0 }; auto timer = new SelectableTimer(interv); - auto executor = new ExecutableTimer(timer, this); - executor->setName("PFC_WD_COUNTERS_POLL"); - Orch::addExecutor("PFC_WD_COUNTERS_POLL", executor); + auto executor = new ExecutableTimer(timer, this, "PFC_WD_COUNTERS_POLL"); + Orch::addExecutor(executor); timer->start(); } diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 48686cc286..c05c32e369 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -243,9 +243,8 @@ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) /* Add port oper status notification support */ DBConnector *notificationsDb = new DBConnector(ASIC_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); m_portStatusNotificationConsumer = new swss::NotificationConsumer(notificationsDb, "NOTIFICATIONS"); - auto portStatusNotificatier = new Notifier(m_portStatusNotificationConsumer, this); - portStatusNotificatier->setName("PORT_STATUS_NOTIFICATIONS"); - Orch::addExecutor("PORT_STATUS_NOTIFICATIONS", portStatusNotificatier); + auto portStatusNotificatier = new Notifier(m_portStatusNotificationConsumer, this, "PORT_STATUS_NOTIFICATIONS"); + Orch::addExecutor(portStatusNotificatier); // Try warm start bake(); diff --git a/orchagent/timer.h b/orchagent/timer.h index e30a973d2f..732f8f4ebf 100644 --- a/orchagent/timer.h +++ b/orchagent/timer.h @@ -8,8 +8,8 @@ namespace swss { class ExecutableTimer : public Executor { public: - ExecutableTimer(SelectableTimer *timer, Orch *orch) - : Executor(timer, orch) + ExecutableTimer(SelectableTimer *timer, Orch *orch, const string &name) + : Executor(timer, orch, name) { } From e4973fc759609c860623d8bd9ec86a0988dac5da Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Thu, 9 Aug 2018 16:13:16 -0700 Subject: [PATCH 4/9] Use common Consumer::dumpTuple() for Orch::recordTuple() and Orch::dumpToSyncTasks() Signed-off-by: Jipan Yang --- orchagent/orch.cpp | 40 ++++++++++++++++++---------------------- orchagent/orch.h | 5 +++-- orchagent/orchdaemon.cpp | 2 +- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index 87d369e254..1d4327e141 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -171,18 +171,25 @@ void Consumer::drain() m_orch->doTask(*this); } -void Consumer::dumpTasks(vector &ts) +string Consumer::dumpTuple(KeyOpFieldsValuesTuple &tuple) +{ + string s = getTableName() + getConsumerTable()->getTableNameSeparator() + kfvKey(tuple) + + "|" + kfvOp(tuple); + for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) + { + s += "|" + fvField(*i) + ":" + fvValue(*i); + } + + return s; +} + +void Consumer::dumpToSyncTasks(vector &ts) { for (auto &tm :m_toSync) { KeyOpFieldsValuesTuple& tuple = tm.second; - string s = getTableName() + ":" + kfvKey(tuple) - + "|" + kfvOp(tuple); - for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) - { - s += "|" + fvField(*i) + ":" + fvValue(*i); - } + string s = dumpTuple(tuple); ts.push_back(s); } @@ -310,7 +317,7 @@ void Orch::doTask() } } -void Orch::dumpTasks(vector &ts) +void Orch::dumpToSyncTasks(vector &ts) { for(auto &it : m_consumerMap) { @@ -321,7 +328,7 @@ void Orch::dumpTasks(vector &ts) continue; } - consumer->dumpTasks(ts); + consumer->dumpToSyncTasks(ts); } } @@ -346,12 +353,7 @@ void Orch::logfileReopen() void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple) { - string s = consumer.getTableName() + ":" + kfvKey(tuple) - + "|" + kfvOp(tuple); - for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) - { - s += "|" + fvField(*i) + ":" + fvValue(*i); - } + string s = consumer.dumpTuple(tuple); gRecordOfs << getTimestamp() << "|" << s << endl; @@ -365,13 +367,7 @@ void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple) string Orch::dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple) { - string s = consumer.getTableName() + ":" + kfvKey(tuple) - + "|" + kfvOp(tuple); - for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) - { - s += "|" + fvField(*i) + ":" + fvValue(*i); - } - + string s = consumer.dumpTuple(tuple); return s; } diff --git a/orchagent/orch.h b/orchagent/orch.h index 0e6a3c793c..c3ee3d8629 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -130,7 +130,8 @@ class Consumer : public Executor { return getConsumerTable()->getDbId(); } - void dumpTasks(vector &ts); + string dumpTuple(KeyOpFieldsValuesTuple &tuple); + void dumpToSyncTasks(vector &ts); void addToSync(std::deque &entries); void refillToSync(); @@ -184,7 +185,7 @@ class Orch /* TODO: refactor recording */ static void recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple); - void dumpTasks(vector &ts); + void dumpToSyncTasks(vector &ts); protected: ConsumerMap m_consumerMap; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 5ff7b0e002..f79f3959c6 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -405,7 +405,7 @@ void OrchDaemon::getTaskToSync(vector &ts) { for (Orch *o : m_orchList) { - o->dumpTasks(ts); + o->dumpToSyncTasks(ts); } } From f6f2555e9785d0042a3bf6751c85bb1f6079cbd9 Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Fri, 10 Aug 2018 17:07:14 -0700 Subject: [PATCH 5/9] Change the validation of pending task after restore to NOTICE level for now. Signed-off-by: Jipan Yang --- orchagent/orchdaemon.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index f79f3959c6..6265736e47 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -421,18 +421,13 @@ bool OrchDaemon::warmRestoreValidation() getTaskToSync(ts); if (ts.size() != 0) { - // TODO: change it to fatal once staged ProducerStateTable/ConsumerStateTable change and - // pre-warmStart consistency validation are ready. - SWSS_LOG_ERROR("There are pending consumer tasks after restore: "); + // TODO: Update this section accordingly once pre-warmStart consistency validation is ready. + SWSS_LOG_NOTICE("There are pending consumer tasks after restore: "); for(auto &s : ts) { - SWSS_LOG_ERROR("%s", s.c_str()); + SWSS_LOG_NOTICE("%s", s.c_str()); } - return false; - } - else - { - WarmStart::setWarmStartState("orchagent", WarmStart::RESTORED); - return true; } + WarmStart::setWarmStartState("orchagent", WarmStart::RESTORED); + return true; } From e6bdeefc34e5f1bb21dceead2f211e1e6f97c40d Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Sat, 11 Aug 2018 19:11:02 -0700 Subject: [PATCH 6/9] Remove unused function call, and update comment format Signed-off-by: Jipan Yang --- orchagent/fdborch.cpp | 3 ++- orchagent/routeorch.cpp | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/orchagent/fdborch.cpp b/orchagent/fdborch.cpp index 018c9d59eb..369ac212c4 100644 --- a/orchagent/fdborch.cpp +++ b/orchagent/fdborch.cpp @@ -53,7 +53,8 @@ void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_obj return; } - if (m_entries.count(update.entry) != 0) // we already have such entries + // we already have such entries + if (m_entries.count(update.entry) != 0) { SWSS_LOG_INFO("FdbOrch notification: mac %s is already in bv_id 0x%lx", update.entry.mac.to_string().c_str(), entry->bv_id); diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index be49726303..18e8d61769 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -102,11 +102,6 @@ RouteOrch::RouteOrch(DBConnector *db, string tableName, NeighOrch *neighOrch) : m_syncdRoutes[v6_default_ip_prefix] = IpAddresses(); SWSS_LOG_NOTICE("Create IPv6 default route with packet action drop"); - - - // Read the pre-existing data for route in appDB. - addExistingData(tableName); - } bool RouteOrch::hasNextHopGroup(const IpAddresses& ipAddresses) const From a4f5742b3e9211f210a75fde1102eee2933ec2a7 Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Sun, 12 Aug 2018 20:31:32 -0700 Subject: [PATCH 7/9] Move orchagent state restore and sync up out of main select loop. Signed-off-by: Jipan Yang --- orchagent/main.cpp | 3 +- orchagent/orchdaemon.cpp | 126 +++++++++++++++++++-------------------- orchagent/orchdaemon.h | 1 + 3 files changed, 63 insertions(+), 67 deletions(-) diff --git a/orchagent/main.cpp b/orchagent/main.cpp index be4a7e3df2..25fa312a4d 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -100,6 +100,7 @@ int main(int argc, char **argv) swss::Logger::linkToDbNative("orchagent"); SWSS_LOG_ENTER(); + WarmStart::checkWarmStart("orchagent"); if (signal(SIGHUP, sighup_handler) == SIG_ERR) { @@ -269,8 +270,6 @@ int main(int argc, char **argv) DBConnector config_db(CONFIG_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); DBConnector state_db(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); - WarmStart::checkWarmStart("orchagent"); - OrchDaemon *orchDaemon = new OrchDaemon(&appl_db, &config_db, &state_db); if (!orchDaemon->init()) { diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 249371d830..6ac871a9fe 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -284,21 +284,7 @@ void OrchDaemon::start() { SWSS_LOG_ENTER(); - bool restored = true; - // executorSet stores all Executors which have data/task to be processed - // after state restore phase of warm start. - set executorSet; - if (WarmStart::isWarmStart()) - { - restored = false; - WarmStart::setWarmStartState("orchagent", WarmStart::INIT); - } - - // Try warm start - for (Orch *o : m_orchList) - { - o->bake(); - } + warmRestoreAndSyncUp(); for (Orch *o : m_orchList) { @@ -324,17 +310,7 @@ void OrchDaemon::start() } auto *c = (Executor *)s; - - if (restored) - { - c->execute(); - } - else if (executorSet.find(c) == executorSet.end()) - { - executorSet.insert(c); - SWSS_LOG_NOTICE("Task for executor %s is being postponed after state restore", - c->getName().c_str()); - } + c->execute(); /* After each iteration, periodically check all m_toSync map to * execute all the remaining tasks that need to be retried. */ @@ -343,45 +319,6 @@ void OrchDaemon::start() for (Orch *o : m_orchList) o->doTask(); - - /* - * All data to be restored have been added to m_toSync of each orch - * at contructor phase. And the order of m_orchList guranteed the - * dependency of tasks had been met, restore is done. - */ - if (!restored && m_select->isQueueEmpty() && gPortsOrch->isInitDone()) - { - /* - * drain remaining data that are out of order like LAG_MEMBER_TABLE and VLAN_MEMBER_TABLE - * since they were checked before LAG_TABLE and VLAN_TABLE. - */ - for (Orch *o : m_orchList) - { - o->doTask(); - } - - warmRestoreValidation(); - SWSS_LOG_NOTICE("Orchagent state restore done"); - restored = true; - syncd_apply_view(); - - // TODO: should be set after port/fdb/arp sync up - WarmStart::setWarmStartState("orchagent", WarmStart::RECONCILED); - - /* Pick up those tasks postponed by restore processing */ - if(!executorSet.empty()) - { - for (Executor *c : executorSet) - { - c->execute(); - } - for (Orch *o : m_orchList) - { - o->doTask(); - } - } - } - /* Let sairedis to flush all SAI function call to ASIC DB. * Normally the redis pipeline will flush when enough request * accumulated. Still it is possible that small amount of @@ -392,6 +329,65 @@ void OrchDaemon::start() } } +/* + * Try to perform orchagent state restore and dynamic states sync up if + * warm start reqeust is detected. + */ +void OrchDaemon::warmRestoreAndSyncUp() +{ + if (!WarmStart::isWarmStart()) + { + return; + } + + WarmStart::setWarmStartState("orchagent", WarmStart::INIT); + + for (Orch *o : m_orchList) + { + o->bake(); + } + + /* + * First iteration is to handle all the existing data in predefined order. + */ + for (Orch *o : m_orchList) + { + o->doTask(); + } + /* + * Drain remaining data that are out of order like LAG_MEMBER_TABLE and VLAN_MEMBER_TABLE + * since they were checked before LAG_TABLE and VLAN_TABLE. + */ + for (Orch *o : m_orchList) + { + o->doTask(); + } + + // One more iteration due to the VLAN lag empty member restrictioin temporary fix, to be removed. + for (Orch *o : m_orchList) + { + o->doTask(); + } + + /* + * At this point, all the pre-existing data should be have been processed properly, and + * orchagent should be in exact same state of pre-shutdown. + * Perform restore validation as needed. + */ + warmRestoreValidation(); + + SWSS_LOG_NOTICE("Orchagent state restore done"); + syncd_apply_view(); + + /* TODO: perform port and fdb state sync up*/ + + /* + * Note. Arp sync up is handled in neighsyncd. + * The "RECONCILED" state of orchagent doesn't mean the state related to neighbor is up to date. + */ + WarmStart::setWarmStartState("orchagent", WarmStart::RECONCILED); +} + /* * Get tasks to sync for consumers of each orch being managed by this orch daemon */ diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index db17c07b8e..53210b7477 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -34,6 +34,7 @@ class OrchDaemon bool init(); void start(); + void warmRestoreAndSyncUp(); void getTaskToSync(vector &ts); bool warmRestoreValidation(); private: From 8f1700ee84c5506427509b7d2edf4ce36eab980d Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Sun, 12 Aug 2018 20:36:18 -0700 Subject: [PATCH 8/9] Remove the extra round of data draining processing which is not needed. Signed-off-by: Jipan Yang --- orchagent/orchdaemon.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 6ac871a9fe..94e855bd01 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -363,14 +363,8 @@ void OrchDaemon::warmRestoreAndSyncUp() o->doTask(); } - // One more iteration due to the VLAN lag empty member restrictioin temporary fix, to be removed. - for (Orch *o : m_orchList) - { - o->doTask(); - } - /* - * At this point, all the pre-existing data should be have been processed properly, and + * At this point, all the pre-existing data should have been processed properly, and * orchagent should be in exact same state of pre-shutdown. * Perform restore validation as needed. */ From 4324763121c8fa960eeb955c6a5ed90e5d963205 Mon Sep 17 00:00:00 2001 From: Jipan Yang Date: Mon, 13 Aug 2018 20:15:36 -0700 Subject: [PATCH 9/9] Address review comments Signed-off-by: Jipan Yang --- orchagent/fdborch.cpp | 2 +- orchagent/intfsorch.cpp | 12 +----------- orchagent/main.cpp | 2 +- orchagent/orch.cpp | 6 +++--- orchagent/orch.h | 6 +++--- orchagent/orchdaemon.cpp | 2 +- 6 files changed, 10 insertions(+), 20 deletions(-) diff --git a/orchagent/fdborch.cpp b/orchagent/fdborch.cpp index 369ac212c4..f5ee04ce21 100644 --- a/orchagent/fdborch.cpp +++ b/orchagent/fdborch.cpp @@ -54,7 +54,7 @@ void FdbOrch::update(sai_fdb_event_t type, const sai_fdb_entry_t* entry, sai_obj } // we already have such entries - if (m_entries.count(update.entry) != 0) + if (m_entries.find(update.entry) != m_entries.end()) { SWSS_LOG_INFO("FdbOrch notification: mac %s is already in bv_id 0x%lx", update.entry.mac.to_string().c_str(), entry->bv_id); diff --git a/orchagent/intfsorch.cpp b/orchagent/intfsorch.cpp index 3b78e28bb2..a49b52ec4e 100644 --- a/orchagent/intfsorch.cpp +++ b/orchagent/intfsorch.cpp @@ -86,16 +86,7 @@ void IntfsOrch::doTask(Consumer &consumer) { if (alias == "lo") { - // set request for lo may come after warm start restore - auto it_intfs = m_syncdIntfses.find(alias); - if (it_intfs == m_syncdIntfses.end()) - { - IntfsEntry intfs_entry; - intfs_entry.ref_count = 0; - m_syncdIntfses[alias] = intfs_entry; - addIp2MeRoute(ip_prefix); - } - + addIp2MeRoute(ip_prefix); it = consumer.m_toSync.erase(it); continue; } @@ -180,7 +171,6 @@ void IntfsOrch::doTask(Consumer &consumer) { if (alias == "lo") { - m_syncdIntfses.erase(alias); removeIp2MeRoute(ip_prefix); it = consumer.m_toSync.erase(it); continue; diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 25fa312a4d..3f5ec2bb83 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -22,7 +22,7 @@ extern "C" { #include "saihelper.h" #include "notifications.h" #include -#include +#include "warm_restart.h" using namespace std; using namespace swss; diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index 6b6bb4d62d..6c5f51c6f6 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -197,7 +197,7 @@ string Consumer::dumpTuple(KeyOpFieldsValuesTuple &tuple) return s; } -void Consumer::dumpToSyncTasks(vector &ts) +void Consumer::dumpPendingTasks(vector &ts) { for (auto &tm :m_toSync) { @@ -350,7 +350,7 @@ void Orch::doTask() } } -void Orch::dumpToSyncTasks(vector &ts) +void Orch::dumpPendingTasks(vector &ts) { for(auto &it : m_consumerMap) { @@ -361,7 +361,7 @@ void Orch::dumpToSyncTasks(vector &ts) continue; } - consumer->dumpToSyncTasks(ts); + consumer->dumpPendingTasks(ts); } } diff --git a/orchagent/orch.h b/orchagent/orch.h index fba0f42483..48239af823 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -131,11 +131,11 @@ class Consumer : public Executor { } string dumpTuple(KeyOpFieldsValuesTuple &tuple); - void dumpToSyncTasks(vector &ts); - void execute(); + void dumpPendingTasks(vector &ts); size_t refillToSync(); size_t refillToSync(Table* table); + void execute(); void drain(); /* Store the latest 'golden' status */ @@ -191,7 +191,7 @@ class Orch /* TODO: refactor recording */ static void recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple); - void dumpToSyncTasks(vector &ts); + void dumpPendingTasks(vector &ts); protected: ConsumerMap m_consumerMap; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 94e855bd01..bd5e04536d 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -389,7 +389,7 @@ void OrchDaemon::getTaskToSync(vector &ts) { for (Orch *o : m_orchList) { - o->dumpToSyncTasks(ts); + o->dumpPendingTasks(ts); } }