Skip to content

Commit

Permalink
[platform] Implement platform phase 2 cases (#980)
Browse files Browse the repository at this point in the history
* [platform] Implement platform phase 2 cases

Implement the SONiC platform phase 2 test cases using the
pytest-ansible framework.

Signed-off-by: Xin Wang <xinw@mellanox.com>

* [platform] Add interface status checking using the interface_facts module

* [platform] Fix some minor issues

* Run reboot command in background to avoid command failure caused by
  SSH connection broken before command returns
* Fine tune the reboot wait timeout values
* Add delay before checking interface status because the intfutil
  command may have no output in time

Signed-off-by: Xin Wang <xinw@mellanox.com>
  • Loading branch information
wangxin authored and liat-grozovik committed Jul 21, 2019
1 parent 93fa55e commit 6ea0101
Show file tree
Hide file tree
Showing 10 changed files with 691 additions and 28 deletions.
83 changes: 83 additions & 0 deletions tests/platform/check_critical_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Helper script for checking status of critical services
This script contains re-usable functions for checking status of critical services.
"""
import time
import logging

from utilities import wait_until

critical_services = ["swss", "syncd", "database", "teamd", "bgp", "pmon", "lldp"]


def get_service_status(dut, service):
"""
@summary: Get the ActiveState and SubState of a service. This function uses the systemctl tool to get the
ActiveState and SubState of specified service.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param service: Service name.
@return: Returns a dictionary containing ActiveState and SubState of the specified service, for example:
{
"ActivateState": "active",
"SubState": "running"
}
"""
output = dut.command("systemctl -p ActiveState -p SubState show %s" % service)
result = {}
for line in output["stdout_lines"]:
fields = line.split("=")
if len(fields) >= 2:
result[fields[0]] = fields[1]
return result


def service_fully_started(dut, service):
"""
@summary: Check whether the specified service is fully started on DUT. According to the SONiC design, the last
instruction in service starting script is to run "docker wait <service_name>". This function take advantage
of this design to check whether a service has been fully started. The trick is to check whether
"docker wait <service_name>" exists in current running processes.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param service: Service name.
@return: Return True if the specified service is fully started. Otherwise return False.
"""
try:
output = dut.command('pgrep -f "docker wait %s"' % service)
if output["stdout_lines"]:
return True
else:
return False
except:
return False


def critical_services_fully_started(dut):
"""
@summary: Check whether all the critical service have been fully started.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@return: Return True if all the critical services have been fully started. Otherwise return False.
"""
result = {}
for service in critical_services:
result[service] = service_fully_started(dut, service)
logging.debug("Status of critical services: %s" % str(result))
return all(result.values())


def check_critical_services(dut):
"""
@summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all
services must be "active". SubState of all services must be "running".
@param dut: The ansible_host object of DUT. For interacting with DUT.
"""
logging.info("Wait until all critical services are fully started")
assert wait_until(300, 20, critical_services_fully_started, dut), "Not all critical services are fully started"

logging.info("Check critical service status")
for service in critical_services:
status = get_service_status(dut, service)
assert status["ActiveState"] == "active", \
"ActiveState of %s is %s, expected: active" % (service, status["ActiveState"])
assert status["SubState"] == "running", \
"SubState of %s is %s, expected: active" % (service, status["SubState"])
57 changes: 57 additions & 0 deletions tests/platform/check_interface_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Helper script for checking status of interfaces
This script contains re-usable functions for checking status of interfaces on SONiC.
"""
import logging


def parse_intf_status(lines):
"""
@summary: Parse the output of command "intfutil description".
@param lines: The output lines of command "intfutil description".
@return: Return a dictionary like:
{
"Ethernet0": {
"oper": "up",
"admin": "up",
"alias": "etp1",
"desc": "ARISTA01T2:Ethernet1"
},
...
}
"""
result = {}
for line in lines:
fields = line.split()
if len(fields) >= 5:
intf = fields[0]
oper, admin, alias, desc = fields[1], fields[2], fields[3], ' '.join(fields[4:])
result[intf] = {"oper": oper, "admin": admin, "alias": alias, "desc": desc}
return result


def check_interface_status(dut, interfaces):
"""
@summary: Check the admin and oper status of the specified interfaces on DUT.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param hostname:
@param interfaces: List of interfaces that need to be checked.
"""
logging.info("Check interface status using cmd 'intfutil'")
mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"]
output = dut.command("intfutil description")
intf_status = parse_intf_status(output["stdout_lines"][2:])
for intf in interfaces:
expected_oper = "up" if intf in mg_ports else "down"
expected_admin = "up" if intf in mg_ports else "down"
assert intf in intf_status, "Missing status for interface %s" % intf
assert intf_status[intf]["oper"] == expected_oper, \
"Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], expected_oper)
assert intf_status[intf]["admin"] == expected_oper, \
"Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], expected_admin)

logging.info("Check interface status using the interface_facts module")
intf_facts = dut.interface_facts(up_ports=mg_ports)["ansible_facts"]
down_ports = intf_facts["ansible_interface_link_down_ports"]
assert len(down_ports) == 0, "Some interfaces are down: %s" % str(down_ports)
119 changes: 119 additions & 0 deletions tests/platform/check_transceiver_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Helper script for checking status of transceivers
This script contains re-usable functions for checking status of transceivers.
"""
import logging
import re
import json


def parse_transceiver_info(output_lines):
"""
@summary: Parse the list of transceiver from DB table TRANSCEIVER_INFO content
@param output_lines: DB table TRANSCEIVER_INFO content output by 'redis' command
@return: Return parsed transceivers in a list
"""
result = []
p = re.compile(r"TRANSCEIVER_INFO\|(Ethernet\d+)")
for line in output_lines:
m = p.match(line)
assert m, "Unexpected line %s" % line
result.append(m.group(1))
return result


def parse_transceiver_dom_sensor(output_lines):
"""
@summary: Parse the list of transceiver from DB table TRANSCEIVER_DOM_SENSOR content
@param output_lines: DB table TRANSCEIVER_DOM_SENSOR content output by 'redis' command
@return: Return parsed transceivers in a list
"""
result = []
p = re.compile(r"TRANSCEIVER_DOM_SENSOR\|(Ethernet\d+)")
for line in output_lines:
m = p.match(line)
assert m, "Unexpected line %s" % line
result.append(m.group(1))
return result


def all_transceivers_detected(dut, interfaces):
"""
Check if transceiver information of all the specified interfaces have been detected.
"""
db_output = dut.command("redis-cli --raw -n 6 keys TRANSCEIVER_INFO\*")["stdout_lines"]
not_detected_interfaces = [intf for intf in interfaces if "TRANSCEIVER_INFO|%s" % intf not in db_output]
if len(not_detected_interfaces) > 0:
logging.debug("Interfaces not detected: %s" % str(not_detected_interfaces))
return False
return True


def check_transceiver_basic(dut, interfaces):
"""
@summary: Check whether all the specified interface are in TRANSCEIVER_INFO redis DB.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param interfaces: List of interfaces that need to be checked.
"""
logging.info("Check whether transceiver information of all ports are in redis")
xcvr_info = dut.command("redis-cli -n 6 keys TRANSCEIVER_INFO*")
parsed_xcvr_info = parse_transceiver_info(xcvr_info["stdout_lines"])
for intf in interfaces:
assert intf in parsed_xcvr_info, "TRANSCEIVER INFO of %s is not found in DB" % intf


def check_transceiver_details(dut, interfaces):
"""
@summary: Check the detailed TRANSCEIVER_INFO content of all the specified interfaces.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param interfaces: List of interfaces that need to be checked.
"""
logging.info("Check detailed transceiver information of each connected port")
expected_fields = ["type", "hardwarerev", "serialnum", "manufacturename", "modelname"]
for intf in interfaces:
port_xcvr_info = dut.command('redis-cli -n 6 hgetall "TRANSCEIVER_INFO|%s"' % intf)
for field in expected_fields:
assert port_xcvr_info["stdout"].find(field) >= 0, \
"Expected field %s is not found in %s while checking %s" % (field, port_xcvr_info["stdout"], intf)


def check_transceiver_dom_sensor_basic(dut, interfaces):
"""
@summary: Check whether all the specified interface are in TRANSCEIVER_DOM_SENSOR redis DB.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param interfaces: List of interfaces that need to be checked.
"""
logging.info("Check whether TRANSCEIVER_DOM_SENSOR of all ports in redis")
xcvr_dom_sensor = dut.command("redis-cli -n 6 keys TRANSCEIVER_DOM_SENSOR*")
parsed_xcvr_dom_sensor = parse_transceiver_dom_sensor(xcvr_dom_sensor["stdout_lines"])
for intf in interfaces:
assert intf in parsed_xcvr_dom_sensor, "TRANSCEIVER_DOM_SENSOR of %s is not found in DB" % intf


def check_transceiver_dom_sensor_details(dut, interfaces):
"""
@summary: Check the detailed TRANSCEIVER_DOM_SENSOR content of all the specified interfaces.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param interfaces: List of interfaces that need to be checked.
"""
logging.info("Check detailed TRANSCEIVER_DOM_SENSOR information of each connected ports")
expected_fields = ["temperature", "voltage", "rx1power", "rx2power", "rx3power", "rx4power", "tx1bias",
"tx2bias", "tx3bias", "tx4bias", "tx1power", "tx2power", "tx3power", "tx4power"]
for intf in interfaces:
port_xcvr_dom_sensor = dut.command('redis-cli -n 6 hgetall "TRANSCEIVER_DOM_SENSOR|%s"' % intf)
for field in expected_fields:
assert port_xcvr_dom_sensor["stdout"].find(field) >= 0, \
"Expected field %s is not found in %s while checking %s" % (field, port_xcvr_dom_sensor["stdout"], intf)


def check_transceiver_status(dut, interfaces):
"""
@summary: Check transceiver information of all the specified interfaces in redis DB.
@param dut: The ansible_host object of DUT. For interacting with DUT.
@param interfaces: List of interfaces that need to be checked.
"""
check_transceiver_basic(dut, interfaces)
check_transceiver_details(dut, interfaces)
check_transceiver_dom_sensor_basic(dut, interfaces)
check_transceiver_dom_sensor_details(dut, interfaces)
44 changes: 44 additions & 0 deletions tests/platform/mellanox/check_hw_mgmt_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Helper function for checking the hw-management service
"""
import logging
import re

from utilities import wait_until


def fan_speed_set_to_default(dut):
fan_speed_setting = dut.command("cat /var/run/hw-management/thermal/pwm1")["stdout"].strip()
return fan_speed_setting == "153"


def wait_until_fan_speed_set_to_default(dut):
wait_until(300, 10, fan_speed_set_to_default, dut)


def check_hw_management_service(dut):
"""This function is to check the hw management service and related settings.
"""
logging.info("Check service status using systemctl")
hw_mgmt_service_state = dut.command("systemctl -p ActiveState -p SubState show hw-management")
assert hw_mgmt_service_state["stdout"].find("ActiveState=active") >= 0, "The hw-management service is not active"
assert hw_mgmt_service_state["stdout"].find("SubState=exited") >= 0, "The hw-management service is not exited"

logging.info("Check the thermal control process")
tc_pid = dut.command("pgrep -f /usr/bin/hw-management-thermal-control.sh")
assert re.match(r"\d+", tc_pid["stdout"]), "The hw-management-thermal-control process is not running"

logging.info("Check thermal control status")
tc_suspend = dut.command("cat /var/run/hw-management/config/suspend")
assert tc_suspend["stdout"] == "1", "Thermal control is not suspended"

logging.info("Check fan speed setting")
fan_speed_setting = dut.command("cat /var/run/hw-management/thermal/pwm1")
assert fan_speed_setting["stdout"] == "153", "Fan speed is not default to 60%. 153/255=60%"

logging.info("Check dmesg")
dmesg = dut.command("sudo dmesg")
error_keywords = ["crash", "Out of memory", "Call Trace", "Exception", "panic"]
for err_kw in error_keywords:
assert not re.match(err_kw, dmesg["stdout"], re.I), \
"Found error keyword %s in dmesg: %s" % (err_kw, dmesg["stdout"])
73 changes: 73 additions & 0 deletions tests/platform/mellanox/check_sysfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""
Helper script for checking status of sysfs.
This script contains re-usable functions for checking status of hw-management related sysfs.
"""
import logging


def check_sysfs(dut):
"""
@summary: Check various hw-management related sysfs under /var/run/hw-management
"""
logging.info("Check broken symbolinks")
broken_symbolinks = dut.command("find /var/run/hw-management -xtype l")
assert len(broken_symbolinks["stdout_lines"]) == 0, \
"Found some broken symbolinks: %s" % str(broken_symbolinks["stdout_lines"])

logging.info("Check content of some key files")

file_suspend = dut.command("cat /var/run/hw-management/config/suspend")
assert file_suspend["stdout"] == "1", "Content of /var/run/hw-management/config/suspend should be 1"

file_pwm1 = dut.command("cat /var/run/hw-management/thermal/pwm1")
assert file_pwm1["stdout"] == "153", "Content of /var/run/hw-management/thermal/pwm1 should be 153"

file_asic = dut.command("cat /var/run/hw-management/thermal/asic")
try:
asic_temp = float(file_asic["stdout"]) / 1000
assert asic_temp > 0 and asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"]
except:
assert "Bad content in /var/run/hw-management/thermal/asic: %s" % file_asic["stdout"]

fan_status_list = dut.command("find /var/run/hw-management/thermal -name fan*_status")
for fan_status in fan_status_list["stdout_lines"]:
fan_status_content = dut.command("cat %s" % fan_status)
assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status

fan_fault_list = dut.command("find /var/run/hw-management/thermal -name fan*_fault")
for fan_fault in fan_fault_list["stdout_lines"]:
fan_fault_content = dut.command("cat %s" % fan_fault)
assert fan_fault_content["stdout"] == "0", "Content of %s is not 0" % fan_fault

fan_min_list = dut.command("find /var/run/hw-management/thermal -name fan*_min")
for fan_min in fan_min_list["stdout_lines"]:
try:
fan_min_content = dut.command("cat %s" % fan_min)
fan_min_speed = int(fan_min_content["stdout"])
assert fan_min_speed > 0, "Bad fan minimum speed: %s" % str(fan_min_speed)
except Exception as e:
assert "Get content from %s failed, exception: %s" % (fan_min, repr(e))

fan_max_list = dut.command("find /var/run/hw-management/thermal -name fan*_max")
for fan_max in fan_max_list["stdout_lines"]:
try:
fan_max_content = dut.command("cat %s" % fan_max)
fan_max_speed = int(fan_max_content["stdout"])
assert fan_max_speed > 10000, "Bad fan maximum speed: %s" % str(fan_max_speed)
except Exception as e:
assert "Get content from %s failed, exception: %s" % (fan_max, repr(e))

fan_speed_get_list = dut.command("find /var/run/hw-management/thermal -name fan*_speed_get")
for fan_speed_get in fan_speed_get_list["stdout_lines"]:
try:
fan_speed_get_content = dut.command("cat %s" % fan_speed_get)
fan_speed = int(fan_speed_get_content["stdout"])
assert fan_speed > 1000, "Bad fan speed: %s" % str(fan_speed)
except Exception as e:
assert "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e))

fan_speed_set_list = dut.command("find /var/run/hw-management/thermal -name fan*_speed_set")
for fan_speed_set in fan_speed_set_list["stdout_lines"]:
fan_speed_set_content = dut.command("cat %s" % fan_speed_set)
assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255"
Loading

0 comments on commit 6ea0101

Please sign in to comment.