Skip to content

Commit

Permalink
[Mellanox] support new platform api, thermal and psu part (#3175)
Browse files Browse the repository at this point in the history
* support new platform api, thermal and psu part
for psu, all APIs are supported.
for thermal, we support
  get_temperature,
  get_high_threshold
for the thermal sensors of cpu core, cpu pack, psu and sfp module
and get_temperature for the ambient thermal sensors around the asic, port, fan, comex and board.

* 1. address review comments
2. improve the handling of PSU inserting/removal
3. tolerance diverse psu thermal sensor file name conventions

* 1. adjust thermal code according to the latest version of hw-management
2. check power_good_status rather than whether file existing ahead of reading voltage, current and power of PSU
  • Loading branch information
stephenxs authored and lguohan committed Jul 22, 2019
1 parent 40c8bc1 commit 1d15022
Show file tree
Hide file tree
Showing 3 changed files with 498 additions and 34 deletions.
27 changes: 18 additions & 9 deletions platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@
#
#############################################################################

import sys

try:
from sonic_platform_base.chassis_base import ChassisBase
from sonic_platform.psu import Psu
from sonic_platform.fan import Fan
from sonic_platform.fan import FAN_PATH
from sonic_platform.sfp import SFP
from sonic_platform.thermal import Thermal, initialize_thermals
from sonic_platform.watchdog import get_watchdog
from sonic_daemon_base.daemon_base import Logger
from eeprom import Eeprom
from os import listdir
from os.path import isfile, join
import sys
import io
import re
import subprocess
Expand Down Expand Up @@ -64,12 +64,12 @@
COMPONENT_CPLD2 = "CPLD2"

# Global logger class instance
SYSLOG_IDENTIFIER = "mlnx-chassis"
SYSLOG_IDENTIFIER = "mlnx-chassis-api"
logger = Logger(SYSLOG_IDENTIFIER)

# magic code defnition for port number, qsfp port position of each hwsku
# port_position_tuple = (PORT_START, QSFP_PORT_START, PORT_END, PORT_IN_BLOCK, EEPROM_OFFSET)
hwsku_dict = {'ACS-MSN2700': 0, "LS-SN2700":0, 'ACS-MSN2740': 0, 'ACS-MSN2100': 1, 'ACS-MSN2410': 2, 'ACS-MSN2010': 3, 'ACS-MSN3700': 0, 'ACS-MSN3700C': 0, 'Mellanox-SN2700': 0, 'Mellanox-SN2700-D48C8': 0}
hwsku_dict_port = {'ACS-MSN2700': 0, "LS-SN2700":0, 'ACS-MSN2740': 0, 'ACS-MSN2100': 1, 'ACS-MSN2410': 2, 'ACS-MSN2010': 3, 'ACS-MSN3700': 0, 'ACS-MSN3700C': 0, 'Mellanox-SN2700': 0, 'Mellanox-SN2700-D48C8': 0}
port_position_tuple_list = [(0, 0, 31, 32, 1), (0, 0, 15, 16, 1), (0, 48, 55, 56, 1),(0, 18, 21, 22, 1)]

class Chassis(ChassisBase):
Expand All @@ -78,9 +78,12 @@ class Chassis(ChassisBase):
def __init__(self):
super(Chassis, self).__init__()

# Initialize SKU name
self.sku_name = self._get_sku_name()

# Initialize PSU list
for index in range(MLNX_NUM_PSU):
psu = Psu(index)
psu = Psu(index, self.sku_name)
self._psu_list.append(psu)

# Initialize watchdog
Expand Down Expand Up @@ -112,6 +115,9 @@ def __init__(self):
sfp_module = SFP(index, 'SFP')
self._sfp_list.append(sfp_module)

# Initialize thermals
initialize_thermals(self.sku_name, self._thermal_list, self._psu_list)

# Initialize EEPROM
self.eeprom = Eeprom()

Expand All @@ -137,10 +143,13 @@ def _extract_num_of_fans_and_fan_drawers(self):

return num_of_fan, num_of_drawer

def _get_port_position_tuple_by_sku_name(self):
def _get_sku_name(self):
p = subprocess.Popen(GET_HWSKU_CMD, shell=True, stdout=subprocess.PIPE)
out, err = p.communicate()
position_tuple = port_position_tuple_list[hwsku_dict[out.rstrip('\n')]]
return out.rstrip('\n')

def _get_port_position_tuple_by_sku_name(self):
position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku_name]]
return position_tuple

def get_base_mac(self):
Expand Down Expand Up @@ -183,8 +192,8 @@ def _read_generic_file(self, filename, len):
result = fileobj.read(len)
fileobj.close()
return result
except:
logger.log_warning("Fail to read file {}, maybe it doesn't exist".format(filename))
except Exception as e:
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
return ''

def _verify_reboot_cause(self, filename):
Expand Down
148 changes: 123 additions & 25 deletions platform/mellanox/mlnx-platform-api/sonic_platform/psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,113 @@
#
#############################################################################

import os.path

try:
import os.path
from sonic_platform_base.psu_base import PsuBase
from sonic_daemon_base.daemon_base import Logger
from sonic_platform.fan import Fan
except ImportError as e:
raise ImportError (str(e) + "- required module not found")

# Global logger class instance
SYSLOG_IDENTIFIER = "mlnx-psu-api"
logger = Logger(SYSLOG_IDENTIFIER)

psu_list = []

PSU_CURRENT = "current"
PSU_VOLTAGE = "voltage"
PSU_POWER = "power"

# SKUs with unplugable PSUs:
# 1. don't have psuX_status and should be treated as always present
# 2. don't have voltage, current and power values
hwsku_dict_with_unplugable_psu = ['ACS-MSN2010', 'ACS-MSN2100']

# in most SKUs the file psuX_curr, psuX_volt and psuX_power contain current, voltage and power data respectively.
# but there are exceptions which will be handled by the following dictionary
hwsku_dict_psu = {'ACS-MSN3700': 1, 'ACS-MSN3700C': 1, 'ACS-MSN3800': 1}
psu_profile_list = [
# default filename convention
{
PSU_CURRENT : "power/psu{}_curr",
PSU_VOLTAGE : "power/psu{}_volt",
PSU_POWER : "power/psu{}_power"
},
# for 3700, 3700c, 3800
{
PSU_CURRENT : "power/psu{}_curr",
PSU_VOLTAGE : "power/psu{}_volt_out2",
PSU_POWER : "power/psu{}_power"
}
]

class Psu(PsuBase):
"""Platform-specific Psu class"""
def __init__(self, psu_index):
def __init__(self, psu_index, sku):
global psu_list
PsuBase.__init__(self)
# PSU is 1-based on Mellanox platform
self.index = psu_index + 1
psu_list.append(self.index)
self.psu_path = "/var/run/hw-management/thermal/"
self.psu_oper_status = "psu{}_pwr_status".format(self.index)
self.psu_presence = "psu{}_status".format(self.index)
if os.path.exists(os.path.join(self.psu_path, self.psu_presence)):
self.presence_file_exists = True
self.psu_path = "/var/run/hw-management/"
psu_oper_status = "thermal/psu{}_pwr_status".format(self.index)
#psu_oper_status should always be present for all SKUs
self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status)

if sku in hwsku_dict_psu:
filemap = psu_profile_list[hwsku_dict_psu[sku]]
else:
filemap = psu_profile_list[0]

if sku in hwsku_dict_with_unplugable_psu:
self.always_presence = True
self.psu_voltage = None
self.psu_current = None
self.psu_power = None
self.psu_presence = None
else:
self.presence_file_exists = False
self.always_presence = False
psu_voltage = filemap[PSU_VOLTAGE].format(self.index)
psu_voltage = os.path.join(self.psu_path, psu_voltage)
self.psu_voltage = psu_voltage

psu_current = filemap[PSU_CURRENT].format(self.index)
psu_current = os.path.join(self.psu_path, psu_current)
self.psu_current = psu_current

psu_power = filemap[PSU_POWER].format(self.index)
psu_power = os.path.join(self.psu_path, psu_power)
self.psu_power = psu_power

psu_presence = "thermal/psu{}_status".format(self.index)
psu_presence = os.path.join(self.psu_path, psu_presence)
self.psu_presence = psu_presence

fan = Fan(psu_index, psu_index, True)
if fan.get_presence():
self._fan = fan

def get_status(self):
def _read_generic_file(self, filename, len):
"""
Read a generic file, returns the contents of the file
"""
result = 0
try:
with open(filename, 'r') as fileobj:
result = int(fileobj.read())
except Exception as e:
logger.log_info("Fail to read file {} due to {}".format(filename, repr(e)))
return result

def get_powergood_status(self):
"""
Retrieves the operational status of power supply unit (PSU) defined
Returns:
bool: True if PSU is operating properly, False if not
"""
status = 0
try:
with open(os.path.join(self.psu_path, self.psu_oper_status), 'r') as power_status:
status = int(power_status.read())
except (ValueError, IOError):
status = 0
status = self._read_generic_file(os.path.join(self.psu_path, self.psu_oper_status), 0)

return status == 1

Expand All @@ -60,15 +125,48 @@ def get_presence(self):
Returns:
bool: True if PSU is present, False if not
"""
status = 0
if self.presence_file_exists:
try:
with open(os.path.join(self.psu_path, self.psu_presence), 'r') as presence_status:
status = int(presence_status.read())
except (ValueError, IOError):
status = 0
if self.always_presence:
return self.always_presence
else:
status = self.index in psu_list
status = self._read_generic_file(self.psu_presence, 0)
return status == 1

return status == 1
def get_voltage(self):
"""
Retrieves current PSU voltage output
Returns:
A float number, the output voltage in volts,
e.g. 12.1
"""
if self.psu_voltage is not None and self.get_powergood_status():
voltage = self._read_generic_file(self.psu_voltage, 0)
return float(voltage) / 1000
else:
return None

def get_current(self):
"""
Retrieves present electric current supplied by PSU
Returns:
A float number, the electric current in amperes, e.g 15.4
"""
if self.psu_current is not None and self.get_powergood_status():
amperes = self._read_generic_file(self.psu_current, 0)
return float(amperes) / 1000
else:
return None

def get_power(self):
"""
Retrieves current energy supplied by PSU
Returns:
A float number, the power in watts, e.g. 302.6
"""
if self.psu_power is not None and self.get_powergood_status():
power = self._read_generic_file(self.psu_power, 0)
return float(power) / 1000000
else:
return None
Loading

0 comments on commit 1d15022

Please sign in to comment.