Skip to content

Commit

Permalink
thermalctld: Add support for fans on non-CPU modules (#555)
Browse files Browse the repository at this point in the history
* thermalctld: Add support for fans on non-CPU modules

* Add module fan to unit tests
  • Loading branch information
patrickmacarthur authored Dec 6, 2024
1 parent 1962fca commit 60e7224
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 11 deletions.
37 changes: 27 additions & 10 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Thermal control daemon for SONiC
"""

from enum import Enum, auto
import signal
import sys
import threading
Expand Down Expand Up @@ -61,18 +62,23 @@ def update_entity_info(table, parent_name, key, device, device_index):
table.set(key, fvs)


class FanType(Enum):
DRAWER = auto()
PSU = auto()
MODULE = auto()

class FanStatus(logger.Logger):
absent_fan_count = 0
faulty_fan_count = 0

def __init__(self, fan=None, is_psu_fan=False):
def __init__(self, fan=None, fan_type=FanType.DRAWER):
"""
Initializer of FanStatus
"""
super(FanStatus, self).__init__(SYSLOG_IDENTIFIER)

self.fan = fan
self.is_psu_fan = is_psu_fan
self.fan_type = fan_type
self.presence = True
self.status = True
self.under_speed = False
Expand All @@ -95,7 +101,7 @@ class FanStatus(logger.Logger):
:param presence: Fan presence status
:return: True if status changed else False
"""
if not presence and not self.is_psu_fan:
if not presence and self.fan_type == FanType.DRAWER:
FanStatus.absent_fan_count += 1

if presence == self.presence:
Expand Down Expand Up @@ -228,16 +234,25 @@ class FanUpdater(logger.Logger):
if self.task_stopping_event.is_set():
return
try:
self._refresh_fan_status(drawer, drawer_index, fan, fan_index)
self._refresh_fan_status(drawer, drawer_index, fan, fan_index, FanType.DRAWER)
except Exception as e:
self.log_warning('Failed to update fan status - {}'.format(repr(e)))

for module_index, module in enumerate(self.chassis.get_all_modules()):
for fan_index, fan in enumerate(module.get_all_fans()):
if self.task_stopping_event.is_set():
return
try:
self._refresh_fan_status(module, module_index, fan, fan_index, FanType.MODULE)
except Exception as e:
self.log_warning('Failed to update module fan status - {}'.format(repr(e)))

for psu_index, psu in enumerate(self.chassis.get_all_psus()):
for fan_index, fan in enumerate(psu.get_all_fans()):
if self.task_stopping_event.is_set():
return
try:
self._refresh_fan_status(psu, psu_index, fan, fan_index, True)
self._refresh_fan_status(psu, psu_index, fan, fan_index, FanType.PSU)
except Exception as e:
self.log_warning('Failed to update PSU fan status - {}'.format(repr(e)))

Expand Down Expand Up @@ -270,7 +285,7 @@ class FanUpdater(logger.Logger):

self.drawer_table.set(drawer_name, fvs)

def _refresh_fan_status(self, parent, parent_index, fan, fan_index, is_psu_fan=False):
def _refresh_fan_status(self, parent, parent_index, fan, fan_index, fan_type=FanType.DRAWER):
"""
Get Fan status by platform API and write to database for a given Fan
:param parent: Parent device of this fan
Expand All @@ -280,15 +295,17 @@ class FanUpdater(logger.Logger):
:param name_prefix: name prefix of Fan object if Fan.get_name not presented
:return:
"""
drawer_name = NOT_AVAILABLE if is_psu_fan else str(try_get(parent.get_name))
if is_psu_fan:
drawer_name = NOT_AVAILABLE if fan_type != FanType.DRAWER else str(try_get(parent.get_name))
if fan_type == FanType.PSU:
parent_name = 'PSU {}'.format(parent_index + 1)
elif fan_type == FanType.MODULE:
parent_name = 'Module {}'.format(parent_index + 1)
else:
parent_name = drawer_name if drawer_name != NOT_AVAILABLE else CHASSIS_INFO_KEY
fan_name = try_get(fan.get_name, '{} fan {}'.format(parent_name, fan_index + 1))
update_entity_info(self.phy_entity_table, parent_name, fan_name, fan, fan_index + 1)
if fan_name not in self.fan_status_dict:
self.fan_status_dict[fan_name] = FanStatus(fan, is_psu_fan)
self.fan_status_dict[fan_name] = FanStatus(fan, fan_type)

fan_status = self.fan_status_dict[fan_name]

Expand Down Expand Up @@ -342,7 +359,7 @@ class FanUpdater(logger.Logger):

# We don't set PSU led here, PSU led will be handled in psud
if set_led:
if not is_psu_fan:
if fan_type == FanType.DRAWER:
self._set_fan_led(parent, fan, fan_name, fan_status)

if fan_fault_status != NOT_AVAILABLE:
Expand Down
2 changes: 2 additions & 0 deletions sonic-thermalctld/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,10 @@ def make_module_thermal(self):
sfp._thermal_list.append(MockThermal())
psu = MockPsu()
psu._thermal_list.append(MockThermal())
fan = MockFan()
module._sfp_list.append(sfp)
module._psu_list.append(psu)
module._fan_list.append(fan)
module._thermal_list.append(MockThermal())

def is_modular_chassis(self):
Expand Down
22 changes: 21 additions & 1 deletion sonic-thermalctld/tests/test_thermalctld.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from sonic_py_common import daemon_base

from .mock_platform import MockChassis, MockFan, MockPsu, MockSfp, MockThermal
from .mock_platform import MockChassis, MockFan, MockModule, MockPsu, MockSfp, MockThermal
from .mock_swsscommon import Table

daemon_base.db_connect = mock.MagicMock()
Expand Down Expand Up @@ -267,6 +267,26 @@ def test_update_psu_fans(self):
else:
fan_updater.log_warning.assert_called_with("Failed to update PSU fan status - Exception('Test message',)")

def test_update_module_fans(self):
chassis = MockChassis()
module = MockModule()
mock_fan = MockFan()
chassis.set_modular_chassis(True)
module._fan_list.append(mock_fan)
chassis._module_list.append(module)
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
fan_updater.update()
assert fan_updater.log_warning.call_count == 0

fan_updater._refresh_fan_status = mock.MagicMock(side_effect=Exception("Test message"))
fan_updater.update()
assert fan_updater.log_warning.call_count == 1

# TODO: Clean this up once we no longer need to support Python 2
if sys.version_info.major == 3:
fan_updater.log_warning.assert_called_with("Failed to update module fan status - Exception('Test message')")
else:
fan_updater.log_warning.assert_called_with("Failed to update module fan status - Exception('Test message',)")

class TestThermalMonitor(object):
"""
Expand Down

0 comments on commit 60e7224

Please sign in to comment.