Skip to content

Commit

Permalink
Add warmboot infra for testing with a sonic neighbor (sonic-net#7961)
Browse files Browse the repository at this point in the history
* Add initial framework for testing warm reboot with SONiC neighbors

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Add support for choosing between Arista and Sonic implementation based on neighbor type

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Include timestamps in log output for easier time comparisons from log

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Bug fixes for when there's no port channel flaps

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Update SONiC warm reboot test case

Handle multiple port channel interfaces, update LACP PDU timing
collection code to current version, clean up imports a bit.

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* fixup! Update SONiC warm reboot test case

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Fix some precommit errors

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Add return in case of attempts exceeded

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Reduce to 5 attempts.

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Fix pre-commit checks

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Fix more precommit issues

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Fail the warmboot test if there's a port channel flap for SONiC neighbor

If testing with a SONiC neighbor, fail the warmboot test if there is a
port channel flap. This is in preparation for the teamd retry count
feature.

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Implement verify_neigh_lag_no_flap

This is used on actual hardware warm-reboot tests.

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Remove unused import

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Remove unused log message, and add log when failing to get the output for a command

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Remove unused variable

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

* Add retry count flag for the warm-reboot command on supported images.

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>

---------

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>
  • Loading branch information
saiarcot895 authored and parmarkj committed Oct 3, 2023
1 parent 77ac36e commit b25346b
Show file tree
Hide file tree
Showing 5 changed files with 602 additions and 5 deletions.
26 changes: 22 additions & 4 deletions ansible/roles/test/files/ptftests/advanced-reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
from fcntl import ioctl
from collections import defaultdict
from device_connection import DeviceConnection
from arista import Arista
from host_device import HostDevice


class StateMachine():
Expand Down Expand Up @@ -178,6 +178,7 @@ def __init__(self):
self.check_param('bgp_v4_v6_time_diff', 40, required=False)
self.check_param('asic_type', '', required=False)
self.check_param('logfile_suffix', None, required=False)
self.check_param('neighbor_type', 'eos', required=False)
if not self.test_params['preboot_oper'] or self.test_params['preboot_oper'] == 'None':
self.test_params['preboot_oper'] = None
if not self.test_params['inboot_oper'] or self.test_params['inboot_oper'] == 'None':
Expand Down Expand Up @@ -1444,7 +1445,10 @@ def neigh_lag_status_check(self):
Ensure there are no interface flaps after warm-boot
"""
for neigh in self.ssh_targets:
self.neigh_handle = Arista(neigh, None, self.test_params)
self.test_params['port_channel_intf_idx'] = [x['ptf_ports'][0] for x in self.vm_dut_map.values()
if x['mgmt_addr'] == neigh]
self.neigh_handle = HostDevice.getHostDeviceInstance(self.test_params['neighbor_type'], neigh,
None, self.test_params)
self.neigh_handle.connect()
fails, flap_cnt = self.neigh_handle.verify_neigh_lag_no_flap()
self.neigh_handle.disconnect()
Expand Down Expand Up @@ -1496,8 +1500,19 @@ def reboot_dut(self):

self.log("Rebooting remote side")
if self.reboot_type != 'service-warm-restart' and self.test_params['other_vendor_flag'] is False:
# Check to see if the warm-reboot script knows about the retry count feature
stdout, stderr, return_code = self.dut_connection.execCommand(
"sudo " + self.reboot_type, timeout=30)
"sudo " + self.reboot_type + " -h", timeout=5)
if "retry count" in stdout:
if self.test_params['neighbor_type'] == "sonic":
stdout, stderr, return_code = self.dut_connection.execCommand(
"sudo " + self.reboot_type + " -N", timeout=30)
else:
stdout, stderr, return_code = self.dut_connection.execCommand(
"sudo " + self.reboot_type + " -n", timeout=30)
else:
stdout, stderr, return_code = self.dut_connection.execCommand(
"sudo " + self.reboot_type, timeout=30)

elif self.test_params['other_vendor_flag'] is True:
ignore_db_integrity_check = " -d"
Expand Down Expand Up @@ -1587,7 +1602,10 @@ def cmd(self, cmds):

def peer_state_check(self, ip, queue):
self.log('SSH thread for VM {} started'.format(ip))
ssh = Arista(ip, queue, self.test_params, log_cb=self.log)
self.test_params['port_channel_intf_idx'] = [x['ptf_ports'][0] for x in self.vm_dut_map.values()
if x['mgmt_addr'] == ip]
ssh = HostDevice.getHostDeviceInstance(self.test_params['neighbor_type'], ip, queue,
self.test_params, log_cb=self.log)
self.fails[ip], self.info[ip], self.cli_info[ip], self.logs_info[ip], self.lacp_pdu_times[ip] = ssh.run()
self.log('SSH thread for VM {} finished'.format(ip))

Expand Down
4 changes: 3 additions & 1 deletion ansible/roles/test/files/ptftests/arista.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
from operator import itemgetter
from collections import defaultdict

import host_device

class Arista(object):

class Arista(host_device.HostDevice):
DEBUG = False
# unit: second
SSH_CMD_TIMEOUT = 10
Expand Down
33 changes: 33 additions & 0 deletions ansible/roles/test/files/ptftests/host_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
class HostDevice(object):

@staticmethod
def getHostDeviceInstance(neighbor_type, *args, **kwargs):
import arista
import sonic
if neighbor_type == "eos":
return arista.Arista(*args, **kwargs)
elif neighbor_type == "sonic":
return sonic.Sonic(*args, **kwargs)
else:
raise NotImplementedError

def connect(self):
raise NotImplementedError

def disconect(self):
raise NotImplementedError

def run(self):
raise NotImplementedError

def verify_neigh_lag_no_flap(self):
raise NotImplementedError

def change_bgp_neigh_state(self, asn, is_up=True):
raise NotImplementedError

def change_bgp_route(self, cfg_map):
raise NotImplementedError

def verify_bgp_neigh_state(self, dut=None, state="Active"):
raise NotImplementedError
Loading

0 comments on commit b25346b

Please sign in to comment.