From 48d095c6a09237490cfd28caf0c2aae797eb6c2f Mon Sep 17 00:00:00 2001 From: Vivek Date: Mon, 12 Sep 2022 16:46:37 -0700 Subject: [PATCH] [portmgr] Fixed the orchagent crash due to late arrival of notif (#2431) Signed-off-by: Vivek Reddy Karri Bulk write to APP_DB i.e. alias, lanes, speed must be read through one notification by orchagent during create_port Handled a race condition in portmgrd which tries to immediately apply a mtu/admin_status SET notif after a DEL causing it to crash --- cfgmgr/portmgr.cpp | 60 ++++++++++++++++++++++++++++++++--------- cfgmgr/portmgr.h | 1 + orchagent/portsorch.cpp | 9 +++++++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/cfgmgr/portmgr.cpp b/cfgmgr/portmgr.cpp index 38c0418a7a..5134b31861 100644 --- a/cfgmgr/portmgr.cpp +++ b/cfgmgr/portmgr.cpp @@ -23,27 +23,55 @@ PortMgr::PortMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, c bool PortMgr::setPortMtu(const string &alias, const string &mtu) { stringstream cmd; - string res; + string res, cmd_str; // ip link set dev mtu cmd << IP_CMD << " link set dev " << shellquote(alias) << " mtu " << shellquote(mtu); - EXEC_WITH_ERROR_THROW(cmd.str(), res); - - // Set the port MTU in application database to update both - // the port MTU and possibly the port based router interface MTU - return writeConfigToAppDb(alias, "mtu", mtu); + cmd_str = cmd.str(); + int ret = swss::exec(cmd_str, res); + if (!ret) + { + // Set the port MTU in application database to update both + // the port MTU and possibly the port based router interface MTU + return writeConfigToAppDb(alias, "mtu", mtu); + } + else if (!isPortStateOk(alias)) + { + // Can happen when a DEL notification is sent by portmgrd immediately followed by a new SET notif + SWSS_LOG_WARN("Setting mtu to alias:%s netdev failed with cmd:%s, rc:%d, error:%s", alias.c_str(), cmd_str.c_str(), ret, res.c_str()); + return false; + } + else + { + throw runtime_error(cmd_str + " : " + res); + } + return true; } bool PortMgr::setPortAdminStatus(const string &alias, const bool up) { stringstream cmd; - string res; + string res, cmd_str; // ip link set dev [up|down] cmd << IP_CMD << " link set dev " << shellquote(alias) << (up ? " up" : " down"); - EXEC_WITH_ERROR_THROW(cmd.str(), res); - - return writeConfigToAppDb(alias, "admin_status", (up ? "up" : "down")); + cmd_str = cmd.str(); + int ret = swss::exec(cmd_str, res); + if (!ret) + { + return writeConfigToAppDb(alias, "admin_status", (up ? "up" : "down")); + } + else if (!isPortStateOk(alias)) + { + // Can happen when a DEL notification is sent by portmgrd immediately followed by a new SET notification + SWSS_LOG_WARN("Setting admin_status to alias:%s netdev failed with cmd%s, rc:%d, error:%s", alias.c_str(), cmd_str.c_str(), ret, res.c_str()); + return false; + } + else + { + throw runtime_error(cmd_str + " : " + res); + } + return true; } bool PortMgr::isPortStateOk(const string &alias) @@ -124,10 +152,9 @@ void PortMgr::doTask(Consumer &consumer) } } - for (auto &entry : field_values) + if (field_values.size()) { - writeConfigToAppDb(alias, fvField(entry), fvValue(entry)); - SWSS_LOG_NOTICE("Configure %s %s to %s", alias.c_str(), fvField(entry).c_str(), fvValue(entry).c_str()); + writeConfigToAppDb(alias, field_values); } if (!portOk) @@ -136,6 +163,7 @@ void PortMgr::doTask(Consumer &consumer) writeConfigToAppDb(alias, "mtu", mtu); writeConfigToAppDb(alias, "admin_status", admin_status); + /* Retry setting these params after the netdev is created */ field_values.clear(); field_values.emplace_back("mtu", mtu); field_values.emplace_back("admin_status", admin_status); @@ -176,3 +204,9 @@ bool PortMgr::writeConfigToAppDb(const std::string &alias, const std::string &fi return true; } + +bool PortMgr::writeConfigToAppDb(const std::string &alias, std::vector &field_values) +{ + m_appPortTable.set(alias, field_values); + return true; +} diff --git a/cfgmgr/portmgr.h b/cfgmgr/portmgr.h index dde346bfe1..683aacc488 100644 --- a/cfgmgr/portmgr.h +++ b/cfgmgr/portmgr.h @@ -30,6 +30,7 @@ class PortMgr : public Orch void doTask(Consumer &consumer); bool writeConfigToAppDb(const std::string &alias, const std::string &field, const std::string &value); + bool writeConfigToAppDb(const std::string &alias, std::vector &field_values); bool setPortMtu(const std::string &alias, const std::string &mtu); bool setPortAdminStatus(const std::string &alias, const bool up); bool isPortStateOk(const std::string &alias); diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 26219d4d35..53bbcf32fe 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -2571,6 +2571,15 @@ bool PortsOrch::addPort(const set &lane_set, uint32_t speed, int an, string { SWSS_LOG_ENTER(); + if (!speed || lane_set.empty()) + { + /* + speed and lane list are mandatory attributes for the initial create_port call + This check is required because the incoming notifs may not be atomic + */ + return true; + } + vector lanes(lane_set.begin(), lane_set.end()); sai_attribute_t attr;