Skip to content

Commit

Permalink
[Mellanox] Adjust PSU voltage WA (#10619)
Browse files Browse the repository at this point in the history
- Why I did it
InvalidPsuVolWA.run might raise exception if user power off PSU when it is running. This exception is not caught and will be raised to psud which causes psud failed to update PSU data to DB.

- How I did it
1. Change the log level when WA does not work. This could happen when user power off PSU, hence changing the log level from error to warning is better
2. Change the wait time from 5 to 1 to avoid introduce too much delay in psud. 1 second is usually enough per my test
3. Give a default return value for function get_voltage_low_threshold and get_voltage_high_threshold to avoid exception reach to psud

- How to verify it
Manual test.
Run sonic-mgmt regression
  • Loading branch information
Junchao-Mellanox authored and pull[bot] committed Jul 13, 2024
1 parent 2436a21 commit 2373ca9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
18 changes: 12 additions & 6 deletions platform/mellanox/mlnx-platform-api/sonic_platform/psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ def get_temperature_high_threshold(self):

return None

@utils.default_return(None)
def get_voltage_high_threshold(self):
"""
Retrieves the high threshold PSU voltage output
Expand All @@ -414,10 +415,12 @@ def get_voltage_high_threshold(self):
if 'max' in capability:
max_voltage = utils.read_int_from_file(self.psu_voltage_max, log_func=logger.log_info)
max_voltage = InvalidPsuVolWA.run(self, max_voltage, self.psu_voltage_max)
return float(max_voltage) / 1000
if max_voltage:
return float(max_voltage) / 1000

return None

@utils.default_return(None)
def get_voltage_low_threshold(self):
"""
Retrieves the low threshold PSU voltage output
Expand All @@ -435,7 +438,8 @@ def get_voltage_low_threshold(self):
if 'min' in capability:
min_voltage = utils.read_int_from_file(self.psu_voltage_min, log_func=logger.log_info)
min_voltage = InvalidPsuVolWA.run(self, min_voltage, self.psu_voltage_min)
return float(min_voltage) / 1000
if min_voltage:
return float(min_voltage) / 1000

return None

Expand Down Expand Up @@ -471,7 +475,7 @@ class InvalidPsuVolWA:
EXPECT_PLATFORMS = ['x86_64-mlnx_msn3700-r0', 'x86_64-mlnx_msn3700c-r0', 'x86_64-mlnx_msn3800-r0', 'x86_64-mlnx_msn4600c-r0']
MFR_FIELD = 'MFR_NAME'
CAPACITY_FIELD = 'CAPACITY'
WAIT_TIME = 5
WAIT_TIME = 1

@classmethod
def run(cls, psu, threshold_value, threshold_file):
Expand Down Expand Up @@ -499,8 +503,8 @@ def run(cls, psu, threshold_value, threshold_file):
logger.log_warning('PSU {} threshold file {} value {}, but its capacity is {}'.format(psu.index, threshold_file, threshold_value, capacity))
return threshold_value

# Run a sensor -s command to triger hardware to get the real threashold value
utils.run_command('sensor -s')
# Run a sensors -s command to triger hardware to get the real threashold value
utils.run_command('sensors -s')

# Wait for the threshold value change
return cls.wait_set_done(threshold_file)
Expand All @@ -516,5 +520,7 @@ def wait_set_done(cls, threshold_file):
wait_time -= 1
time.sleep(1)

logger.log_error('sensor -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME))
# It is enough to use warning here because user might power off/on the PSU which may cause threshold_file
# does not exist
logger.log_warning('sensors -s does not recover PSU threshold sensor after {} seconds'.format(cls.WAIT_TIME))
return None
2 changes: 1 addition & 1 deletion platform/mellanox/mlnx-platform-api/tests/test_psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,4 @@ def get_entry_value(key):
# Normal
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
mock_run_command.assert_called_with('sensor -s')
mock_run_command.assert_called_with('sensors -s')

0 comments on commit 2373ca9

Please sign in to comment.