From afdcf34a4e0b73b455be375cb5075961393c32c7 Mon Sep 17 00:00:00 2001 From: zhenggen-xu Date: Mon, 12 Nov 2018 07:51:59 -0800 Subject: [PATCH] Support neighsyncd system warmreboot. (#661) * Support neighsyncd system warmreboot. neighsyncd will waits for kernel restore process to be done before reconciliation Add vs testcases to cover kernel neighbor table restore process and neignsyncd process upon system warm reboot Signed-off-by: Zhenggen Xu * Add the neigh_restore table to swss-schema.md Make the state check function more accurate. Signed-off-by: Zhenggen Xu * Add restore_neighbors.py to be part of swss deb pkg: In case system warm reboot is enabled, it will try to restore the neighbor table from appDB into kernel through netlink API calls and update the neighbor table by sending arp/ns requests to all neighbor entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process. Added timeout in neighsyncd when waiting for restore_neighbors to finish Updated vs testcases Signed-off-by: Zhenggen Xu * Use chrono::steady_clock in neighsyncd for time check Use monotonic lib for python time check Update the warmrestart python binding lib and re-enabled restore cnt check in vs tests Signed-off-by: Zhenggen Xu * Use table hget to simply the code Time-out value changes vs test case changes to support default host side neigh table settings. Signed-off-by: Zhenggen Xu * Fix vs test cases after merge Signed-off-by: Zhenggen Xu --- debian/swss.install | 1 + doc/swss-schema.md | 5 + neighsyncd/neighsync.cpp | 17 +- neighsyncd/neighsync.h | 10 +- neighsyncd/neighsyncd.cpp | 34 ++- neighsyncd/restore_neighbors.py | 245 +++++++++++++++++++ tests/conftest.py | 3 + tests/test_warm_reboot.py | 420 ++++++++++++++++++++++++++++---- 8 files changed, 677 insertions(+), 58 deletions(-) create mode 100755 neighsyncd/restore_neighbors.py diff --git a/debian/swss.install b/debian/swss.install index e04f63269be3..d61bf3faa78a 100644 --- a/debian/swss.install +++ b/debian/swss.install @@ -6,3 +6,4 @@ swssconfig/sample/th.64ports.buffers.json etc/swss/config.d swssconfig/sample/th.64ports.qos.json etc/swss/config.d swssconfig/sample/th2.118ports.buffers.json etc/swss/config.d swssconfig/sample/th2.118ports.qos.json etc/swss/config.d +neighsyncd/restore_neighbors.py usr/bin diff --git a/doc/swss-schema.md b/doc/swss-schema.md index 9d02908e766f..4cf3681c3763 100644 --- a/doc/swss-schema.md +++ b/doc/swss-schema.md @@ -745,6 +745,11 @@ Stores information for physical switch ports managed by the switch chip. Ports t ; dynanic data like port state, neighbor, routes ; and so on. +### NEIGH_RESTORE_TABLE + ;State for neighbor table restoring process during warm reboot + key = NEIGH_RESTORE_TABLE|Flags + restored = "true" / "false" ; restored state + ## Configuration files What configuration files should we have? Do apps, orch agent each need separate files? diff --git a/neighsyncd/neighsync.cpp b/neighsyncd/neighsync.cpp index d0e36cc5fffe..f441208900d8 100644 --- a/neighsyncd/neighsync.cpp +++ b/neighsyncd/neighsync.cpp @@ -16,12 +16,27 @@ using namespace std; using namespace swss; -NeighSync::NeighSync(RedisPipeline *pipelineAppDB) : +NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) : m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME), + m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME), m_AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", &m_neighTable, DEFAULT_NEIGHSYNC_WARMSTART_TIMER) { } +// Check if neighbor table is restored in kernel +bool NeighSync::isNeighRestoreDone() +{ + string value; + + m_stateNeighRestoreTable.hget("Flags", "restored", value); + if (value == "true") + { + SWSS_LOG_NOTICE("neighbor table restore to kernel is done"); + return true; + } + return false; +} + void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj) { char ipStr[MAX_ADDR_SIZE + 1] = {0}; diff --git a/neighsyncd/neighsync.h b/neighsyncd/neighsync.h index 1889e0cadabb..aefe9bdeb7c6 100644 --- a/neighsyncd/neighsync.h +++ b/neighsyncd/neighsync.h @@ -8,6 +8,11 @@ #define DEFAULT_NEIGHSYNC_WARMSTART_TIMER 5 +//This is the timer value (in seconds) that the neighsyncd waiting for restore_neighbors +//service to finish, should be longer than the restore_neighbors timeout value (60) +//This should not happen, if happens, system is in a unknown state, we should exit. +#define RESTORE_NEIGH_WAIT_TIME_OUT 70 + namespace swss { class NeighSync : public NetMsg @@ -15,16 +20,19 @@ class NeighSync : public NetMsg public: enum { MAX_ADDR_SIZE = 64 }; - NeighSync(RedisPipeline *pipelineAppDB); + NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb); virtual void onMsg(int nlmsg_type, struct nl_object *obj); + bool isNeighRestoreDone(); + AppRestartAssist *getRestartAssist() { return &m_AppRestartAssist; } private: + Table m_stateNeighRestoreTable; ProducerStateTable m_neighTable; AppRestartAssist m_AppRestartAssist; }; diff --git a/neighsyncd/neighsyncd.cpp b/neighsyncd/neighsyncd.cpp index 07236a8515e3..cce6487ca463 100644 --- a/neighsyncd/neighsyncd.cpp +++ b/neighsyncd/neighsyncd.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include "logger.h" #include "select.h" #include "netdispatcher.h" @@ -14,8 +17,9 @@ int main(int argc, char **argv) DBConnector appDb(APPL_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); RedisPipeline pipelineAppDB(&appDb); + DBConnector stateDb(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); - NeighSync sync(&pipelineAppDB); + NeighSync sync(&pipelineAppDB, &stateDb); NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync); NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync); @@ -27,16 +31,36 @@ int main(int argc, char **argv) NetLink netlink; Select s; - netlink.registerGroup(RTNLGRP_NEIGH); - cout << "Listens to neigh messages..." << endl; - netlink.dumpRequest(RTM_GETNEIGH); + using namespace std::chrono; - s.addSelectable(&netlink); if (sync.getRestartAssist()->isWarmStartInProgress()) { sync.getRestartAssist()->readTableToMap(); + + steady_clock::time_point starttime = steady_clock::now(); + while (!sync.isNeighRestoreDone()) + { + duration time_span = + duration_cast>(steady_clock::now() - starttime); + int pasttime = int(time_span.count()); + SWSS_LOG_INFO("waited neighbor table to be restored to kernel" + " for %d seconds", pasttime); + if (pasttime > RESTORE_NEIGH_WAIT_TIME_OUT) + { + SWSS_LOG_ERROR("neighbor table restore is not finished" + " after timed-out, exit!!!"); + exit(EXIT_FAILURE); + } + sleep(1); + } sync.getRestartAssist()->startReconcileTimer(s); } + + netlink.registerGroup(RTNLGRP_NEIGH); + cout << "Listens to neigh messages..." << endl; + netlink.dumpRequest(RTM_GETNEIGH); + + s.addSelectable(&netlink); while (true) { Selectable *temps; diff --git a/neighsyncd/restore_neighbors.py b/neighsyncd/restore_neighbors.py new file mode 100755 index 000000000000..ceafa2ecc729 --- /dev/null +++ b/neighsyncd/restore_neighbors.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python + +"""" +Description: restore_neighbors.py -- restoring neighbor table into kernel during system warm reboot. + The script is started by supervisord in swss docker when the docker is started. + If does not do anything in case warm restart is not enabled. + In case system warm reboot is enabled, it will try to restore the neighbor table into kernel + through netlink API calls and update the neigh table by sending arp/ns requests to all neighbor + entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process. + In case docker restart enabled only, it sets the stateDB flag so neighsyncd can follow + the same logic. +""" + +import sys +import swsssdk +import netifaces +import time +import monotonic +from pyroute2 import IPRoute, NetlinkError +from pyroute2.netlink.rtnl import ndmsg +from socket import AF_INET,AF_INET6 +import logging +logging.getLogger("scapy.runtime").setLevel(logging.ERROR) +from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr +from swsscommon import swsscommon +import errno + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) +logger.addHandler(logging.NullHandler()) + +# timeout the restore process in 1 min if not finished +# This is mostly to wait for interfaces to be created and up after warm-reboot +# It would be good to keep that below routing reconciliation time-out. +TIME_OUT = 60 + +# every 5 seconds to check interfaces state +CHECK_INTERVAL = 5 + +ip_family = {"IPv4": AF_INET, "IPv6": AF_INET6} + +# return the first ipv4/ipv6 address assigned on intf +def first_ip_on_intf(intf, family): + if intf in netifaces.interfaces(): + ipaddresses = netifaces.ifaddresses(intf) + if ip_family[family] in ipaddresses: + # cover link local address as well + return ipaddresses[ip_family[family]][0]['addr'].split("%")[0] + return None + +# check if the intf is operational up +def is_intf_oper_state_up(intf): + oper_file = '/sys/class/net/{0}/carrier' + try: + state_file = open(oper_file.format(intf), 'r') + state = state_file.readline().rstrip() + except Exception as e: + logger.info('Error: {}'.format(str(e))) + return False + if state == '1': + return True + return False + +# read the neigh table from AppDB to memory, format as below +# build map as below, this can efficiently access intf and family groups later +# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] } +# { family2 -> [[ipM, macM], [ipN, macN] ...] } }, +# ... +# intfA -> { { family1 -> [[ipW, macW], [ipX, macX] ...] } +# { family2 -> [[ipY, macY], [ipZ, macZ] ...] } } +# } +# +# Alternatively: +# 1, we can build: +# { intf1 -> [[family1, ip1, mac1], [family2, ip2, mac2] ...]}, +# ... +# { intfA -> [[family1, ipX, macX], [family2, ipY, macY] ...]} +# +# 2, Or simply build two maps based on families +# These alternative solutions would have worse performance because: +# 1, need iterate the whole list if only one family is up. +# 2, need check interface state twice due to the split map + +def read_neigh_table_to_maps(): + db = swsssdk.SonicV2Connector(host='127.0.0.1') + db.connect(db.APPL_DB, False) + + intf_neigh_map = {} + + keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*') + keys = [] if keys is None else keys + for key in keys: + key_split = key.split(':', 2) + intf_name = key_split[1] + if intf_name == 'lo': + continue + dst_ip = key_split[2] + value = db.get_all(db.APPL_DB, key) + if 'neigh' in value and 'family' in value: + dmac = value['neigh'] + family = value['family'] + else: + raise RuntimeError('Neigh table format is incorrect') + + if family not in ip_family: + raise RuntimeError('Neigh table format is incorrect') + + ip_mac_pair = [] + ip_mac_pair.append(dst_ip) + ip_mac_pair.append(dmac) + + intf_neigh_map.setdefault(intf_name, {}).setdefault(family, []).append(ip_mac_pair) + db.close(db.APPL_DB) + return intf_neigh_map + + +# Use netlink to set neigh table into kernel, not overwrite the existing ones +def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac): + logging.info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format( + family, intf_idx, dst_ip, dmac)) + + if family not in ip_family: + return + + family_af_inet = ip_family[family] + try : + ipclass.neigh('add', + family=family_af_inet, + dst=dst_ip, + lladdr=dmac, + ifindex=intf_idx, + state=ndmsg.states['reachable']) + # If neigh exists, log it but no exception raise, other exceptions, raise + except NetlinkError as e: + if e[0] == errno.EEXIST: + logger.warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format( + family, intf_idx, dst_ip, dmac)) + else: + raise + +# build ARP or NS packets depending on family +def build_arp_ns_pkt(family, smac, src_ip, dst_ip): + if family == 'IPv4': + eth = Ether(src=smac, dst='ff:ff:ff:ff:ff:ff') + pkt = eth/ARP(op=ARP.who_has, pdst=dst_ip) + elif family == 'IPv6': + nsma = in6_getnsma(inet_pton(AF_INET6, dst_ip)) + mcast_dst_ip = inet_ntop(AF_INET6, nsma) + dmac = in6_getnsmac(nsma) + eth = Ether(src=smac,dst=dmac) + ipv6 = IPv6(src=src_ip, dst=mcast_dst_ip) + ns = ICMPv6ND_NS(tgt=dst_ip) + ns_opt = ICMPv6NDOptSrcLLAddr(lladdr=smac) + pkt = eth/ipv6/ns/ns_opt + return pkt + +# Set the statedb "NEIGH_RESTORE_TABLE|Flags", so neighsyncd can start reconciliation +def set_statedb_neigh_restore_done(): + db = swsssdk.SonicV2Connector(host='127.0.0.1') + db.connect(db.STATE_DB, False) + db.set(db.STATE_DB, 'NEIGH_RESTORE_TABLE|Flags', 'restored', 'true') + db.close(db.STATE_DB) + return + +def restore_update_kernel_neighbors(intf_neigh_map): + # create object for netlink calls to kernel + ipclass = IPRoute() + mtime = monotonic.time.time + start_time = mtime() + while (mtime() - start_time) < TIME_OUT: + for intf, family_neigh_map in intf_neigh_map.items(): + # only try to restore to kernel when link is up + if is_intf_oper_state_up(intf): + src_mac = get_if_hwaddr(intf) + intf_idx = ipclass.link_lookup(ifname=intf)[0] + # create socket per intf to send packets + s = conf.L2socket(iface=intf) + + # Only two families: 'IPv4' and 'IPv6' + for family in ip_family.keys(): + # if ip address assigned and if we have neighs in this family, restore them + src_ip = first_ip_on_intf(intf, family) + if src_ip and (family in family_neigh_map): + neigh_list = family_neigh_map[family] + for dst_ip, dmac in neigh_list: + # use netlink to set neighbor entries + set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac) + + # best effort to update kernel neigh info + # this will be updated by arp_update later too + s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip)) + # delete this family on the intf + del intf_neigh_map[intf][family] + # close the pkt socket + s.close() + + # if all families are deleted, remove the key + if len(intf_neigh_map[intf]) == 0: + del intf_neigh_map[intf] + # map is empty, all neigh entries are restored + if not intf_neigh_map: + break + time.sleep(CHECK_INTERVAL) + + +def main(): + + print "restore_neighbors service is started" + + # Use warmstart python binding + warmstart = swsscommon.WarmStart() + warmstart.initialize("neighsyncd", "swss") + warmstart.checkWarmStart("neighsyncd", "swss", False) + + # if swss or system warm reboot not enabled, don't run + if not warmstart.isWarmStart(): + print "restore_neighbors service is skipped as warm restart not enabled" + return + + # swss restart not system warm reboot + if not warmstart.isSystemWarmRebootEnabled(): + set_statedb_neigh_restore_done() + print "restore_neighbors service is done as system warm reboot not enabled" + return + + # read the neigh table from appDB to internal map + try: + intf_neigh_map = read_neigh_table_to_maps() + except RuntimeError as e: + logger.exception(str(e)) + sys.exit(1) + + try: + restore_update_kernel_neighbors(intf_neigh_map) + except Exception as e: + logger.exception(str(e)) + sys.exit(1) + + # set statedb to signal other processes like neighsyncd + set_statedb_neigh_restore_done() + print "restore_neighbor service is done for system warmreboot" + return + +if __name__ == '__main__': + main() diff --git a/tests/conftest.py b/tests/conftest.py index d8f5e2f082f6..220beb6d55dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -135,6 +135,9 @@ def runcmd(self, cmd): def runcmd_async(self, cmd): return subprocess.Popen("ip netns exec %s %s" % (self.nsname, cmd), shell=True) + def runcmd_output(self, cmd): + return subprocess.check_output("ip netns exec %s %s" % (self.nsname, cmd), shell=True) + class DockerVirtualSwitch(object): def __init__(self, name=None, keeptb=False): self.basicd = ['redis-server', diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index ace175872258..2d2ba7af1fa9 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -293,6 +293,12 @@ def stop_neighsyncd(dvs): def start_neighsyncd(dvs): dvs.runcmd(['sh', '-c', 'supervisorctl start neighsyncd']) +def stop_restore_neighbors(dvs): + dvs.runcmd(['sh', '-c', 'pkill -x restore_neighbors']) + +def start_restore_neighbors(dvs): + dvs.runcmd(['sh', '-c', 'supervisorctl start restore_neighbors']) + def check_no_neighsyncd_timer(dvs): (exitcode, string) = dvs.runcmd(['sh', '-c', 'grep getWarmStartTimer /var/log/syslog | grep neighsyncd | grep invalid']) assert string.strip() != "" @@ -301,13 +307,37 @@ def check_neighsyncd_timer(dvs, timer_value): (exitcode, num) = dvs.runcmd(['sh', '-c', "grep getWarmStartTimer /var/log/syslog | grep neighsyncd | tail -n 1 | rev | cut -d ' ' -f 1 | rev"]) assert num.strip() == timer_value +def check_redis_neigh_entries(dvs, neigh_tbl, number): + (exitcode, lb_output) = dvs.runcmd(['sh', '-c', "redis-cli keys NEIGH_TABLE:lo* | grep NEI | wc -l"]) + lb_num = int(lb_output.strip()) + assert len(neigh_tbl.getKeys()) == number + lb_num + +def check_kernel_reachable_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip neigh show nud reachable| grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def check_kernel_reachable_v4_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip -4 neigh show nud reachable | grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def check_kernel_reachable_v6_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip -6 neigh show nud reachable | grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def kernel_restore_neighs_done(restoretbl): + keys = restoretbl.getKeys() + return (len(keys) > 0) + # function to check neighbor entry reconciliation status written in syslog def check_syslog_for_neighbor_entry(dvs, marker, new_cnt, delete_cnt, iptype): # check reconciliation results (new or delete entries) for ipv4 and ipv6 if iptype == "ipv4" or iptype == "ipv6": - (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep cache-state:NEW | grep -i %s | wc -l" % (marker, iptype)]) + (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep cache-state:NEW | grep -i %s | grep -v 'lo:'| wc -l" % (marker, iptype)]) assert num.strip() == str(new_cnt) - (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep -E \"cache-state:(DELETE|STALE)\" | grep -i %s | wc -l" % (marker, iptype)]) + (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep -E \"cache-state:(DELETE|STALE)\" | grep -i %s | grep -v 'lo:' | wc -l" % (marker, iptype)]) assert num.strip() == str(delete_cnt) else: assert "iptype is unknown" == "" @@ -327,6 +357,12 @@ def test_swss_neighbor_syncup(dvs, testlog): # create neighbor entries (4 ipv4 and 4 ip6, two each on each interface) in linux kernel intfs = ["Ethernet24", "Ethernet28"] + + for intf in intfs: + # set timeout to be the same as real HW + dvs.runcmd("sysctl -w net.ipv4.neigh.{}.base_reachable_time_ms=1800000".format(intf)) + dvs.runcmd("sysctl -w net.ipv6.neigh.{}.base_reachable_time_ms=1800000".format(intf)) + #enable ipv6 on docker dvs.runcmd("sysctl net.ipv6.conf.all.disable_ipv6=0") @@ -342,10 +378,10 @@ def test_swss_neighbor_syncup(dvs, testlog): macs = ["00:00:00:00:24:02", "00:00:00:00:24:03", "00:00:00:00:28:02", "00:00:00:00:28:03"] for i in range(len(ips)): - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip neigh add {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) for i in range(len(v6ips)): - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip -6 neigh add {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) time.sleep(1) @@ -354,7 +390,7 @@ def test_swss_neighbor_syncup(dvs, testlog): tbl = swsscommon.Table(db, "NEIGH_TABLE") for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: @@ -364,7 +400,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: @@ -384,14 +420,16 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd and sairedis.rec stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() pubsub = dvs.SubscribeAsicDbObject("SAI_OBJECT_TYPE_NEIGHBOR_ENTRY") start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # Check the neighbor entries are still in appDB correctly for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: @@ -401,7 +439,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: @@ -433,24 +471,26 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # delete even nummber of ipv4/ipv6 neighbor entries from each interface for i in range(0, len(ips), 2): - dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i%2])) + dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i/2])) for i in range(0, len(v6ips), 2): - dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i%2])) + dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i/2])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # check ipv4 and ipv6 neighbors for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) #should not see deleted neighbor entries - if i %2 == 0: + if i % 2 == 0: assert status == False continue else: @@ -464,9 +504,9 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) #should not see deleted neighbor entries - if i %2 == 0: + if i % 2 == 0: assert status == False continue else: @@ -504,6 +544,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # add even nummber of ipv4/ipv6 neighbor entries to each interface @@ -511,21 +552,22 @@ def test_swss_neighbor_syncup(dvs, testlog): for i in range(0, len(ips), 2): (rc, output) = dvs.runcmd(['sh', '-c', "ip -4 neigh | grep {}".format(ips[i])]) print output - if rc == 0: - dvs.runcmd("ip neigh change {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + if output: + dvs.runcmd("ip neigh change {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) else: - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip neigh add {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) for i in range(0, len(v6ips), 2): (rc, output) = dvs.runcmd(['sh', '-c', "ip -6 neigh | grep {}".format(v6ips[i])]) print output - if rc == 0: - dvs.runcmd("ip -6 neigh change {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + if output: + dvs.runcmd("ip -6 neigh change {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) else: - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip -6 neigh add {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # no neighsyncd timer configured @@ -533,7 +575,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # check ipv4 and ipv6 neighbors, should see all neighbors for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -542,7 +584,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -565,7 +607,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # # Testcase 5: # Even number of ip4/6 neigbors updated with new mac. - # Odd number of ipv4/6 neighbors removed and added to different interfaces. + # Odd number of ipv4/6 neighbors removed # neighbor syncd should sync it up after warm restart # include the timer settings in this testcase @@ -579,28 +621,28 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # Even number of ip4/6 neigbors updated with new mac. - # Odd number of ipv4/6 neighbors removed and added to different interfaces. + # Odd number of ipv4/6 neighbors removed newmacs = ["00:00:00:01:12:02", "00:00:00:01:12:03", "00:00:00:01:16:02", "00:00:00:01:16:03"] for i in range(len(ips)): if i % 2 == 0: - dvs.runcmd("ip neigh change {} dev {} lladdr {}".format(ips[i], intfs[i%2], newmacs[i])) + dvs.runcmd("ip neigh change {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], newmacs[i])) else: - dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i%2])) - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[1-i%2], macs[i])) + dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i/2])) for i in range(len(v6ips)): if i % 2 == 0: - dvs.runcmd("ip -6 neigh change {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], newmacs[i])) + dvs.runcmd("ip -6 neigh change {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], newmacs[i])) else: - dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i%2])) - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[1-i%2], macs[i])) + dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i/2])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # timer is not expired yet, state should be "restored" @@ -613,7 +655,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # check ipv4 and ipv6 neighbors, should see all neighbors with updated info for i in range(len(ips)): if i % 2 == 0: - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -621,17 +663,12 @@ def test_swss_neighbor_syncup(dvs, testlog): if v[0] == "family": assert v[1] == "IPv4" else: - (status, fvs) = tbl.get("{}:{}".format(intfs[1-i%2], ips[i])) - assert status == True - for v in fvs: - if v[0] == "neigh": - assert v[1] == macs[i] - if v[0] == "family": - assert v[1] == "IPv4" + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) + assert status == False for i in range(len(v6ips)): if i % 2 == 0: - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -639,23 +676,18 @@ def test_swss_neighbor_syncup(dvs, testlog): if v[0] == "family": assert v[1] == "IPv6" else: - (status, fvs) = tbl.get("{}:{}".format(intfs[1-i%2], v6ips[i])) - assert status == True - for v in fvs: - if v[0] == "neigh": - assert v[1] == macs[i] - if v[0] == "family": - assert v[1] == "IPv6" + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) + assert status == False time.sleep(2) # check syslog and asic db for activities - # 4 news, 2 deletes for ipv4 and ipv6 each - # 4 create, 4 set, 4 removes for neighbor in asic db - check_syslog_for_neighbor_entry(dvs, marker, 4, 2, "ipv4") - check_syslog_for_neighbor_entry(dvs, marker, 4, 2, "ipv6") + # 2 news, 2 deletes for ipv4 and ipv6 each + # 4 set, 4 removes for neighbor in asic db + check_syslog_for_neighbor_entry(dvs, marker, 2, 2, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 2, 2, "ipv6") (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) - assert nadd == 8 + assert nadd == 4 assert ndel == 4 # check restore Count @@ -787,6 +819,10 @@ def test_swss_port_state_syncup(dvs, testlog): assert oper_status == "down" else: assert oper_status == "up" + #clean up arp + dvs.runcmd("arp -d 10.0.0.1") + dvs.runcmd("arp -d 10.0.0.3") + dvs.runcmd("arp -d 10.0.0.5") ############################################################################# @@ -1442,3 +1478,285 @@ def test_routing_WarmRestart(dvs, testlog): assert rt_key['dest'] == "192.168.100.0/24" +# 'ip neigh flush all' won't remove failed entries if number of neighs less than gc_threshold1 +# Also it takes time to remove them completly. +# We use arp off/on to do it +def flush_neigh_entries(dvs): + dvs.runcmd("ip link set group default arp off") + dvs.runcmd("ip link set group default arp on") + +def test_system_warmreboot_neighbor_syncup(dvs, testlog): + + appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0) + conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0) + + #enable ipv6 on docker + dvs.runcmd("sysctl net.ipv6.conf.all.disable_ipv6=0") + + # flush all neighs first + flush_neigh_entries(dvs) + time.sleep(5) + + dvs.runcmd("config warm_restart enable system") + + # Test neighbors on NUM_INTF (e,g 8) interfaces + # Ethernet32/36/.../60, with ip: 32.0.0.1/24... 60.0.0.1/24 + # ipv6: 3200::1/64...6000::1/64 + # bring up the servers'interfaces and assign NUM_NEIGH_PER_INTF (e,g 128) ips per interface + # TBD: NUM_NEIGH_PER_INTF >= 128 ips will cause test framework to hang by default settings + # TBD: Need tune gc_thresh1/2/3 at host side of vs docker to support this. + NUM_INTF = 8 + NUM_NEIGH_PER_INTF = 64 #128 + NUM_OF_NEIGHS = (NUM_INTF*NUM_NEIGH_PER_INTF) + macs = [] + for i in range(8, 8+NUM_INTF): + # set timeout to be the same as real HW + # set ip on server facing interfaces + # bring servers' interface up, save the macs + dvs.runcmd("sysctl -w net.ipv4.neigh.Ethernet{}.base_reachable_time_ms=1800000".format(i*4)) + dvs.runcmd("sysctl -w net.ipv6.neigh.Ethernet{}.base_reachable_time_ms=1800000".format(i*4)) + dvs.runcmd("ip addr flush dev Ethernet{}".format(i*4)) + dvs.runcmd("ifconfig Ethernet{} {}.0.0.1/24 up".format(i*4, i*4)) + dvs.runcmd("ip -6 addr add {}00::1/64 dev Ethernet{}".format(i*4,i*4)) + dvs.servers[i].runcmd("ip link set up dev eth0") + dvs.servers[i].runcmd("ip addr flush dev eth0") + result = dvs.servers[i].runcmd_output("ifconfig eth0 | grep HWaddr | awk '{print $NF}'") + macs.append(result.strip()) + + # + # Testcase 1: + # Add neighbor entries on servers connecting to SONiC ports + # 128 ipv4 and 128 ipv6 on each server + # total: 1024 ipv4 and 1024 ipv6 + # ping them to get the neighbor entries + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + dvs.servers[i].runcmd("ip addr add {}.0.0.{}/24 dev eth0".format(i*4, j+2)) + dvs.servers[i].runcmd("ip -6 addr add {}00::{}/64 dev eth0".format(i*4,j+2)) + + time.sleep(1) + + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + dvs.runcmd(['sh', '-c', "ping -c 1 -W 0 -q {}.0.0.{} > /dev/null 2>&1".format(i*4,j+2)]) + dvs.runcmd(['sh', '-c', "ping6 -c 1 -W 0 -q {}00::{} > /dev/null 2>&1".format(i*4,j+2)]) + + # Check the neighbor entries are inserted correctly + db = swsscommon.DBConnector(0, dvs.redis_sock, 0) + tbl = swsscommon.Table(db, "NEIGH_TABLE") + + # number of neighbors should match what we configured + # ipv4/ipv6 entries and loopback + check_redis_neigh_entries(dvs, tbl, 2*NUM_OF_NEIGHS) + + # All neighbor entries should match + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + (status, fvs) = tbl.get("Ethernet{}:{}.0.0.{}".format(i*4, i*4, j+2)) + assert status == True + for v in fvs: + if v[0] == "family": + assert v[1] == "IPv4" + if v[0] == "neigh": + assert v[1] == macs[i-8] + + (status, fvs) = tbl.get("Ethernet{}:{}00::{}".format(i*4, i*4, j+2)) + assert status == True + for v in fvs: + if v[0] == "family": + assert v[1] == "IPv6" + if v[0] == "neigh": + assert v[1] == macs[i-8] + + # + # Testcase 2: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all neighbors learned in kernel + # no changes should be there in syslog and sairedis.rec + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + time.sleep(3) + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + pubsub = dvs.SubscribeAsicDbObject("SAI_OBJECT_TYPE_NEIGHBOR_ENTRY") + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # should finish the store within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == 0 + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # + # Testcase 3: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # Remove half of ips of servers' interfaces, add new half of ips + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all new neighbors learned in kernel + # no changes should be there in syslog and sairedis.rec + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + + # Del half of the ips and a new half of the ips + # note: the first ipv4 can not be deleted only + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.servers[i].runcmd("ip addr del {}.0.0.{}/24 dev eth0".format(i*4, j+NUM_NEIGH_PER_INTF/2+2)) + dvs.servers[i].runcmd("ip -6 addr del {}00::{}/64 dev eth0".format(i*4,j+NUM_NEIGH_PER_INTF/2+2)) + dvs.servers[i].runcmd("ip addr add {}.0.0.{}/24 dev eth0".format(i*4, j+NUM_NEIGH_PER_INTF+2)) + dvs.servers[i].runcmd("ip -6 addr add {}00::{}/64 dev eth0".format(i*4,j+NUM_NEIGH_PER_INTF+2)) + + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # should finish the store within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == 0 + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # Test case 4: + # ping the new ips, should get it into appDB + marker = dvs.add_log_marker() + + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.runcmd(['sh', '-c', "ping -c 1 -W 0 -q {}.0.0.{} > /dev/null 2>&1".format(i*4,j+NUM_NEIGH_PER_INTF+2)]) + dvs.runcmd(['sh', '-c', "ping6 -c 1 -W 0 -q {}00::{} > /dev/null 2>&1".format(i*4,j+NUM_NEIGH_PER_INTF+2)]) + + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS+NUM_OF_NEIGHS/2) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS+NUM_OF_NEIGHS/2) + check_redis_neigh_entries(dvs, tbl, 2*(NUM_OF_NEIGHS+NUM_OF_NEIGHS/2)) + + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == NUM_OF_NEIGHS #ipv4 and ipv6 + assert ndel == 0 + + # Remove stale entries manually + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.runcmd(['sh', '-c', "ip neigh del {}.0.0.{} dev Ethernet{}".format(i*4,j+NUM_NEIGH_PER_INTF/2+2, i*4)]) + dvs.runcmd(['sh', '-c', "ip -6 neigh del {}00::{} dev Ethernet{}".format(i*4,j+NUM_NEIGH_PER_INTF/2+2, i*4)]) + + time.sleep(5) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + check_redis_neigh_entries(dvs, tbl, 2*NUM_OF_NEIGHS) + + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == NUM_OF_NEIGHS #ipv4 and ipv6 + + # + # Testcase 5: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # keep half of the interface down + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all new neighbors with interface up to be learned in kernel + # syslog/sai log should show half of the entries stale/deleted + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + time.sleep(3) + + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # bring down half of the links + for i in range(8, 8+NUM_INTF/2): + dvs.runcmd("ip link set down dev Ethernet{}".format(i*4)) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # restore for up interfaces should be done within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS/2) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS/2) + + restoretbl = swsscommon.Table(state_db, swsscommon.STATE_NEIGH_RESTORE_TABLE_NAME) + + # waited 10 above already + i = 10 + while (not kernel_restore_neighs_done(restoretbl)): + print "Waiting for kernel neighbors restore process done: {} seconds".format(i) + time.sleep(10) + i += 10 + + time.sleep(10) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, NUM_OF_NEIGHS/2, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, NUM_OF_NEIGHS/2, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == NUM_OF_NEIGHS + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # disable system warm restart + dvs.runcmd("config warm_restart disable system") +