Skip to content

Commit

Permalink
[system-health] Improve code structure of system health CLIs (#2453)
Browse files Browse the repository at this point in the history
  • Loading branch information
Junchao-Mellanox authored and StormLiangMS committed Dec 30, 2022
1 parent 488e571 commit cff4fed
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 135 deletions.
194 changes: 62 additions & 132 deletions show/system_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,144 +5,82 @@
from tabulate import tabulate
import utilities_common.cli as clicommon

#
# 'system-health' command ("show system-health")
#
@click.group(name='system-health', cls=clicommon.AliasedGroup)
def system_health():
"""Show system-health information"""
return

@system_health.command()
def summary():
"""Show system-health summary information"""
# Mock the redis for unit test purposes #
try:
if os.environ["UTILITIES_UNIT_TESTING"] == "1":
modules_path = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, modules_path)
from tests.system_health_test import MockerManager
from tests.system_health_test import MockerChassis
HealthCheckerManager = MockerManager
Chassis = MockerChassis
except Exception:
# Normal run... #
def get_system_health_status():
if os.environ["UTILITIES_UNIT_TESTING"] == "1":
modules_path = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, modules_path)
from tests.system_health_test import MockerManager
from tests.system_health_test import MockerChassis
HealthCheckerManager = MockerManager
Chassis = MockerChassis
else:
if os.geteuid():
click.echo("Root privileges are required for this operation")
return
exit(1)
from health_checker.manager import HealthCheckerManager
from sonic_platform.chassis import Chassis


manager = HealthCheckerManager()
if not manager.config.config_file_exists():
click.echo("System health configuration file not found, exit...")
return
exit(1)

chassis = Chassis()
stat = manager.check(chassis)
chassis.initizalize_system_led()
led = chassis.get_status_led()
click.echo("System status summary\n\n System status LED " + led)
services_list = []
fs_list = []
device_list =[]
for category, elements in stat.items():
for element in elements:
if elements[element]['status'] != "OK":
if 'Running' in elements[element]['message']:
services_list.append(element)
elif 'Accessible' in elements[element]['message']:
fs_list.append(element)
else:
device_list.append(elements[element]['message'])
if len(services_list) or len(fs_list):
click.echo(" Services:\n Status: Not OK")
else:
click.echo(" Services:\n Status: OK")
if len(services_list):
services_list_string = str(services_list)
click.echo(" Not Running: " + services_list_string.replace("[", "").replace(']', ""))
if len(fs_list):
fs_list_string = str(fs_list)
click.echo(" Not Accessible: " + fs_list_string.replace("[", "").replace(']', ""))
if len(device_list):
click.echo(" Hardware:\n Status: Not OK")
click.echo(" Reasons: " + device_list.pop())
while len(device_list):
click.echo("\t " + device_list.pop())
else:
click.echo(" Hardware:\n Status: OK")

@system_health.command()
def detail():
"""Show system-health detail information"""
# Mock the redis for unit test purposes #
try:
if os.environ["UTILITIES_UNIT_TESTING"] == "1":
modules_path = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, modules_path)
from tests.system_health_test import MockerManager
from tests.system_health_test import MockerChassis
HealthCheckerManager = MockerManager
Chassis = MockerChassis
except Exception:
# Normal run... #
if os.geteuid():
click.echo("Root privileges are required for this operation")
return
from health_checker.manager import HealthCheckerManager
from sonic_platform.chassis import Chassis
return manager, chassis, stat

manager = HealthCheckerManager()
if not manager.config.config_file_exists():
click.echo("System health configuration file not found, exit...")
return
chassis = Chassis()
stat = manager.check(chassis)
#summary output
chassis.initizalize_system_led()
led = chassis.get_status_led()
def display_system_health_summary(stat, led):
click.echo("System status summary\n\n System status LED " + led)
services_list = []
fs_list = []
device_list =[]
for category, elements in stat.items():
for element in elements:
if elements[element]['status'] != "OK":
if 'Running' in elements[element]['message']:
services_list.append(element)
elif 'Accessible' in elements[element]['message']:
fs_list.append(element)
if category == 'Services':
if 'Accessible' in elements[element]['message']:
fs_list.append(element)
else:
services_list.append(element)
else:
device_list.append(elements[element]['message'])
if len(services_list) or len(fs_list):
if services_list or fs_list:
click.echo(" Services:\n Status: Not OK")
else:
click.echo(" Services:\n Status: OK")
if len(services_list):
services_list_string = str(services_list)
click.echo(" Not Running: " + services_list_string.replace("[", "").replace(']', ""))
if len(fs_list):
fs_list_string = str(fs_list)
click.echo(" Not Accessible: " + fs_list_string.replace("[", "").replace(']', ""))
if len(device_list):
if services_list:
click.echo(" Not Running: " + ', '.join(services_list))
if fs_list:
click.echo(" Not Accessible: " + ', '.join(fs_list))
if device_list:
click.echo(" Hardware:\n Status: Not OK")
click.echo(" Reasons: " + device_list.pop())
while len(device_list):
click.echo("\t " + device_list.pop())
device_list.reverse()
click.echo(" Reasons: " + device_list[0])
if len(device_list) > 1:
click.echo('\n'.join(("\t " + x) for x in device_list[1:]))
else:
click.echo(" Hardware:\n Status: OK")

def display_monitor_list(stat):
click.echo('\nSystem services and devices monitor list\n')
header = ['Name', 'Status', 'Type']
table = []
for category, elements in stat.items():
for elements in stat.values():
for element in sorted(elements.items(), key=lambda x: x[1]['status']):
entry = []
entry.append(element[0])
entry.append(element[1]['status'])
entry.append(element[1]['type'])
table.append(entry)
click.echo(tabulate(table, header))


def display_ignore_list(manager):
header = ['Name', 'Status', 'Type']
click.echo('\nSystem services and devices ignore list\n')
table = []
if manager.config.ignore_services:
Expand All @@ -161,43 +99,35 @@ def detail():
table.append(entry)
click.echo(tabulate(table, header))

#
# 'system-health' command ("show system-health")
#
@click.group(name='system-health', cls=clicommon.AliasedGroup)
def system_health():
"""Show system-health information"""
return

@system_health.command()
def summary():
"""Show system-health summary information"""
_, chassis, stat = get_system_health_status()
display_system_health_summary(stat, chassis.get_status_led())


@system_health.command()
def detail():
"""Show system-health detail information"""
manager, chassis, stat = get_system_health_status()
display_system_health_summary(stat, chassis.get_status_led())
display_monitor_list(stat)
display_ignore_list(manager)


@system_health.command()
def monitor_list():
"""Show system-health monitored services and devices name list"""
# Mock the redis for unit test purposes #
try:
if os.environ["UTILITIES_UNIT_TESTING"] == "1":
modules_path = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, modules_path)
from tests.system_health_test import MockerManager
from tests.system_health_test import MockerChassis
HealthCheckerManager = MockerManager
Chassis = MockerChassis
except Exception:
# Normal run... #
if os.geteuid():
click.echo("Root privileges are required for this operation")
return
from health_checker.manager import HealthCheckerManager
from sonic_platform.chassis import Chassis

manager = HealthCheckerManager()
if not manager.config.config_file_exists():
click.echo("System health configuration file not found, exit...")
return
chassis = Chassis()
stat = manager.check(chassis)
click.echo('\nSystem services and devices monitor list\n')
header = ['Name', 'Status', 'Type']
table = []
for category, elements in stat.items():
for element in sorted(elements.items(), key=lambda x: x[1]['status']):
entry = []
entry.append(element[0])
entry.append(element[1]['status'])
entry.append(element[1]['type'])
table.append(entry)
click.echo(tabulate(table, header))
_, _, stat = get_system_health_status()
display_monitor_list(stat)


@system_health.group('sysready-status',invoke_without_command=True)
Expand Down
6 changes: 3 additions & 3 deletions tests/system_health_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_health_summary(self):
System status LED red
Services:
Status: Not OK
Not Running: 'telemetry', 'snmp_subagent'
Not Running: telemetry, snmp_subagent
Hardware:
Status: OK
"""
Expand Down Expand Up @@ -171,7 +171,7 @@ def test_health_detail(self):
System status LED red
Services:
Status: Not OK
Not Running: 'telemetry', 'sflowmgrd'
Not Running: telemetry, sflowmgrd
Hardware:
Status: Not OK
Reasons: Failed to get voltage minimum threshold data for PSU 1
Expand Down Expand Up @@ -243,7 +243,7 @@ def test_health_detail(self):
System status LED red
Services:
Status: Not OK
Not Running: 'telemetry', 'sflowmgrd'
Not Running: telemetry, sflowmgrd
Hardware:
Status: OK
Expand Down

0 comments on commit cff4fed

Please sign in to comment.