Skip to content

Commit

Permalink
Warm reboot: port state sync up (sonic-net#557)
Browse files Browse the repository at this point in the history
* Warm start: port state sync up
* Use Table::hget() to simplify the oper_status retrieval processing.
* Add more comment for port state sync up
* Use m_oper_status field of Port class instead of reading port oper status rom appDB.
* Add common function for port oper status update
* Throw execption upon port oper status get error
* Add VS test for port state sync up
  • Loading branch information
jipanyang authored and qiluo-msft committed Sep 14, 2018
1 parent bf38bec commit 41e61bd
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 5 deletions.
9 changes: 8 additions & 1 deletion orchagent/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,14 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}

syncd_apply_view();
/*
* In syncd view comparison solution, apply view has been sent
* immediately after restore is done
*/
if (!WarmStart::isWarmStart())
{
syncd_apply_view();
}

orchDaemon->start();
}
Expand Down
7 changes: 6 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ using namespace swss;

extern sai_switch_api_t* sai_switch_api;
extern sai_object_id_t gSwitchId;

extern void syncd_apply_view();
/*
* Global orch daemon variables
*/
Expand Down Expand Up @@ -387,7 +389,10 @@ bool OrchDaemon::warmRestoreAndSyncUp()

SWSS_LOG_NOTICE("Orchagent state restore done");

/* TODO: perform port and fdb state sync up*/
syncd_apply_view();

/* Start dynamic state sync up */
gPortsOrch->refreshPortStatus();

/*
* Note. Arp sync up is handled in neighsyncd.
Expand Down
58 changes: 56 additions & 2 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2866,10 +2866,64 @@ void PortsOrch::doTask(NotificationConsumer &consumer)

SWSS_LOG_NOTICE("Get port state change notification id:%lx status:%d", id, status);

this->updateDbPortOperStatus(id, status);
this->setHostIntfsOperStatus(id, status == SAI_PORT_OPER_STATUS_UP);
Port p;
if (!getPort(id, p))
{
SWSS_LOG_ERROR("Failed to get port object for port id 0x%lx", id);
continue;
}
updatePortOperStatus(p, status);
}

sai_deserialize_free_port_oper_status_ntf(count, portoperstatus);
}
}

void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status)
{
if (status != port.m_oper_status)
{
SWSS_LOG_NOTICE("Port state changed for %s from %s to %s", port.m_alias.c_str(),
oper_status_strings.at(port.m_oper_status).c_str(), oper_status_strings.at(status).c_str());
this->updateDbPortOperStatus(port.m_port_id, status);
if(status == SAI_PORT_OPER_STATUS_UP || port.m_oper_status == SAI_PORT_OPER_STATUS_UP)
{
this->setHostIntfsOperStatus(port.m_port_id, status == SAI_PORT_OPER_STATUS_UP);
}
}
}
/*
* sync up orchagent with libsai/ASIC for port state.
*
* Currently NotificationProducer is used by syncd to inform port state change,
* which means orchagent will miss the signal if it happens between orchagent shutdown and startup.
* Syncd doesn't know whether the signal has been lost or not.
* Also the source of notification event is from libsai/SDK.
*
* Latest oper status for each port is retrieved via SAI_PORT_ATTR_OPER_STATUS sai API,
* the hostif and db are updated accordingly.
*/
void PortsOrch::refreshPortStatus()
{
SWSS_LOG_ENTER();

for (auto &it: m_portList)
{
auto &p = it.second;
if (p.m_type == Port::PHY)
{
sai_attribute_t attr;
attr.id = SAI_PORT_ATTR_OPER_STATUS;

sai_status_t ret = sai_port_api->get_port_attribute(p.m_port_id, 1, &attr);
if (ret != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get oper status for %s", p.m_alias.c_str());
throw "PortsOrch get port oper status failure";
}
sai_port_oper_status_t status = (sai_port_oper_status_t)attr.value.u32;
SWSS_LOG_INFO("%s oper status is %s", p.m_alias.c_str(), oper_status_strings.at(status).c_str());
updatePortOperStatus(p, status);
}
}
}
4 changes: 3 additions & 1 deletion orchagent/portsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class PortsOrch : public Orch, public Subject
bool setPortPfc(sai_object_id_t portId, uint8_t pfc_bitmask);

void generateQueueMap();

void refreshPortStatus();
private:
unique_ptr<Table> m_counterTable;
unique_ptr<Table> m_portTable;
Expand Down Expand Up @@ -167,6 +167,8 @@ class PortsOrch : public Orch, public Subject

bool setPortAutoNeg(sai_object_id_t id, int an);
bool setPortFecMode(sai_object_id_t id, int fec);

void updatePortOperStatus(Port &port, sai_port_oper_status_t status);
};
#endif /* SWSS_PORTSORCH_H */

94 changes: 94 additions & 0 deletions tests/test_warm_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@
import time
import json

# start processes in SWSS
def start_swss(dvs):
dvs.runcmd(['sh', '-c', 'supervisorctl start orchagent; supervisorctl start portsyncd; supervisorctl start intfsyncd; \
supervisorctl start neighsyncd; supervisorctl start intfmgrd; supervisorctl start vlanmgrd; \
supervisorctl start buffermgrd; supervisorctl start arp_update'])

# stop processes in SWSS
def stop_swss(dvs):
dvs.runcmd(['sh', '-c', 'supervisorctl stop orchagent; supervisorctl stop portsyncd; supervisorctl stop intfsyncd; \
supervisorctl stop neighsyncd; supervisorctl stop intfmgrd; supervisorctl stop vlanmgrd; \
supervisorctl stop buffermgrd; supervisorctl stop arp_update'])


# Get restart count of all processes supporting warm restart
def swss_get_RestartCount(state_db):
restart_count = {}
Expand Down Expand Up @@ -683,3 +696,84 @@ def test_swss_neighbor_syncup(dvs):
check_sairedis_for_neighbor_entry(dvs, 4, 4, 4)
# check restart Count
swss_app_check_RestartCount_single(state_db, restart_count, "neighsyncd")

def test_swss_port_state_syncup(dvs):

appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0)
state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0)

# enable warm restart
# TODO: use cfg command to config it
create_entry_tbl(
conf_db,
swsscommon.CFG_WARM_RESTART_TABLE_NAME, "swss",
[
("enable", "true"),
]
)

tbl = swsscommon.Table(appl_db, swsscommon.APP_PORT_TABLE_NAME)

restart_count = swss_get_RestartCount(state_db)

# update port admin state
dvs.runcmd("ifconfig Ethernet0 10.0.0.0/31 up")
dvs.runcmd("ifconfig Ethernet4 10.0.0.2/31 up")
dvs.runcmd("ifconfig Ethernet8 10.0.0.4/31 up")

dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01")
dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02")
dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03")

dvs.servers[0].runcmd("ip link set down dev eth0") == 0
dvs.servers[1].runcmd("ip link set down dev eth0") == 0
dvs.servers[2].runcmd("ip link set down dev eth0") == 0

dvs.servers[2].runcmd("ip link set up dev eth0") == 0

time.sleep(3)

for i in [0, 1, 2]:
(status, fvs) = tbl.get("Ethernet%d" % (i * 4))
assert status == True
oper_status = "unknown"
for v in fvs:
if v[0] == "oper_status":
oper_status = v[1]
break
if i == 2:
assert oper_status == "up"
else:
assert oper_status == "down"

stop_swss(dvs)
time.sleep(3)

# flap the port oper status for Ethernet0, Ethernet4 and Ethernet8
dvs.servers[0].runcmd("ip link set down dev eth0") == 0
dvs.servers[1].runcmd("ip link set down dev eth0") == 0
dvs.servers[2].runcmd("ip link set down dev eth0") == 0

dvs.servers[0].runcmd("ip link set up dev eth0") == 0
dvs.servers[1].runcmd("ip link set up dev eth0") == 0

time.sleep(5)
start_swss(dvs)
time.sleep(10)

swss_check_RestartCount(state_db, restart_count)

for i in [0, 1, 2]:
(status, fvs) = tbl.get("Ethernet%d" % (i * 4))
assert status == True
oper_status = "unknown"
for v in fvs:
if v[0] == "oper_status":
oper_status = v[1]
break
if i == 2:
assert oper_status == "down"
else:
assert oper_status == "up"

0 comments on commit 41e61bd

Please sign in to comment.