diff --git a/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json new file mode 100644 index 000000000000..f7d107d33faf --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json @@ -0,0 +1,65 @@ +{ + "info_types": [ + { + "type": "fan_info" + }, + { + "type": "psu_info" + } + ], + "policies": [ + { + "name": "any fan absence", + "conditions": [ + { + "type": "fan.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "any psu absence", + "conditions": [ + { + "type": "psu.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "all fan and psu presence", + "conditions": [ + { + "type": "fan.all.presence" + }, + { + "type": "psu.all.presence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "true" + } + ] + } + ] +} \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/dockers/docker-dhcp-relay/start.sh b/dockers/docker-dhcp-relay/start.sh index 0ac5ea1a10ec..54e58dd42a07 100755 --- a/dockers/docker-dhcp-relay/start.sh +++ b/dockers/docker-dhcp-relay/start.sh @@ -16,6 +16,12 @@ if [ $(supervisorctl status | grep -c "^isc-dhcp-relay:") -gt 0 ]; then # lifetime of the process. /usr/bin/wait_for_intf.sh + # Allow a bit more time for interfaces to settle before starting the + # relay agent processes. + # FIXME: Remove/decrease this once we determine how to prevent future race + # conditions here. + sleep 180 + # Start all DHCP relay agent(s) supervisorctl start isc-dhcp-relay:* fi diff --git a/files/build_templates/dhcp_relay.service.j2 b/files/build_templates/dhcp_relay.service.j2 index 9106e29a41e2..d501a663feba 100644 --- a/files/build_templates/dhcp_relay.service.j2 +++ b/files/build_templates/dhcp_relay.service.j2 @@ -1,6 +1,6 @@ [Unit] Description=DHCP relay container -Requires=updategraph.service swss.service teamd.service +Requires=updategraph.service After=updategraph.service swss.service syncd.service teamd.service Before=ntp-config.service StartLimitIntervalSec=1200 @@ -15,4 +15,4 @@ Restart=always RestartSec=30 [Install] -WantedBy=multi-user.target swss.service teamd.service +WantedBy=multi-user.target diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 13d5432e09ea..f62ef44f0fc0 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -253,6 +253,11 @@ sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd.service $FILESYSTEM_ROOT/etc/systemd/s echo "caclmgrd.service" | sudo tee -a $GENERATED_SERVICE_FILE sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd $FILESYSTEM_ROOT/usr/bin/ +# Copy process/docker cpu/memory utilization data export daemon +sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd.service $FILESYSTEM_ROOT/etc/systemd/system/ +echo "procdockerstatsd.service" | sudo tee -a $GENERATED_SERVICE_FILE +sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd $FILESYSTEM_ROOT/usr/bin/ + # Copy process-reboot-cause service files sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause.service $FILESYSTEM_ROOT/etc/systemd/system/ echo "process-reboot-cause.service" | sudo tee -a $GENERATED_SERVICE_FILE diff --git a/files/image_config/hostcfgd/hostcfgd b/files/image_config/hostcfgd/hostcfgd index b32e3d8cd668..7fa8ed2ba205 100755 --- a/files/image_config/hostcfgd/hostcfgd +++ b/files/image_config/hostcfgd/hostcfgd @@ -304,6 +304,45 @@ class HostConfigDaemon: add = False self.iptables.iptables_handler(key, data, add) + + def feature_status_handler(self, key, data): + status_data = self.config_db.get_table('FEATURE') + for key in status_data.keys(): + if not key: + syslog.syslog(syslog.LOG_WARNING, "FEATURE key is missing") + return + status = status_data[key]['status'] + if not status: + syslog.syslog(syslog.LOG_WARNING, "status is missing for {}".format(key)) + return + if status == "enabled": + start_cmds=[] + start_cmds.append("sudo systemctl enable {}".format(key)) + start_cmds.append("sudo systemctl start {}".format(key)) + for cmd in start_cmds: + syslog.syslog(syslog.LOG_INFO, "Running cmd - {}".format(cmd)) + try: + subprocess.check_call(cmd, shell=True) + except subprocess.CalledProcessError as err: + syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}" + .format(err.cmd, err.returncode, err.output)) + return + syslog.syslog(syslog.LOG_INFO, "Feature '{}' is enabled and started".format(key)) + elif status == "disabled": + stop_cmds=[] + stop_cmds.append("sudo systemctl stop {}".format(key)) + stop_cmds.append("sudo systemctl disable {}".format(key)) + for cmd in stop_cmds: + syslog.syslog(syslog.LOG_INFO, "Running cmd - {}".format(cmd)) + try: + subprocess.check_call(cmd, shell=True) + except subprocess.CalledProcessError as err: + syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}" + .format(err.cmd, err.returncode, err.output)) + return + syslog.syslog(syslog.LOG_INFO, "Feature '{}' is stopped and disabled".format(key)) + else: + syslog.syslog(syslog.LOG_ERR, "Unexpected status value '{}' for '{}'".format(status, key)) def start(self): self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data)) @@ -311,6 +350,7 @@ class HostConfigDaemon: self.config_db.subscribe('TACPLUS', lambda table, key, data: self.tacacs_global_handler(key, data)) self.config_db.subscribe('DEVICE_METADATA', lambda table, key, data: self.hostname_handler(key, data)) self.config_db.subscribe('LOOPBACK_INTERFACE', lambda table, key, data: self.lpbk_handler(key, data)) + self.config_db.subscribe('FEATURE', lambda table, key, data: self.feature_status_handler(key, data)) self.config_db.listen() @@ -321,4 +361,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/files/image_config/procdockerstatsd/procdockerstatsd b/files/image_config/procdockerstatsd/procdockerstatsd new file mode 100644 index 000000000000..66d2d45009d5 --- /dev/null +++ b/files/image_config/procdockerstatsd/procdockerstatsd @@ -0,0 +1,214 @@ +# !/usr/bin/env python +''' +procdockerstatsd +Daemon which periodically gathers process and docker statistics and pushes the data to STATE_DB +''' + +import os +import re +import subprocess +import sys +import syslog +import time +from datetime import datetime + +import swsssdk + +VERSION = '1.0' + +SYSLOG_IDENTIFIER = "procdockerstatsd" + +REDIS_HOSTIP = "127.0.0.1" + +# ========================== Syslog wrappers ========================== +def log_info(msg, also_print_to_console=False): + syslog.openlog(SYSLOG_IDENTIFIER) + syslog.syslog(syslog.LOG_INFO, msg) + syslog.closelog() + +def log_warning(msg, also_print_to_console=False): + syslog.openlog(SYSLOG_IDENTIFIER) + syslog.syslog(syslog.LOG_WARNING, msg) + syslog.closelog() + +def log_error(msg, also_print_to_console=False): + syslog.openlog(SYSLOG_IDENTIFIER) + syslog.syslog(syslog.LOG_ERR, msg) + syslog.closelog() + +# ========================== ProcessDocker class ========================== +class ProcDockerStats: + + def __init__(self): + self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) + self.state_db.connect("STATE_DB") + + def run_command(self, cmd): + proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + (stdout, stderr) = proc.communicate() + if proc.returncode != 0: + log_error("Error running command '{}'".format(cmd)) + return None + else: + return stdout + + def format_docker_cmd_output(self, cmdout): + lines = re.split("\n", cmdout) + keys = re.split(" +", lines[0]) + docker_data = dict() + docker_data_list = [] + for item in lines[1:]: + values1 = re.split(" +", item) + docker_data = dict(zip(keys, values1)) + docker_data_list.append(docker_data) + formatted_dict = self.create_docker_dict(docker_data_list) + return formatted_dict + + def format_process_cmd_output(self, cmdout): + lines = re.split("\n", cmdout) + keys = re.split(" +", lines[0]) + keylist = list(filter(None, keys)) + process_data = dict() + process_data_list = [] + for item in lines[1:]: + values1 = re.split(" +", str(item)) + # To remove extra space before UID + val = list(filter(None, values1)) + # Merging extra columns created due to space in cmd ouput + val[8:] = [''.join(val[8:])] + process_data = dict(zip(keylist, val)) + process_data_list.append(process_data) + return process_data_list + + def convert_to_bytes(self, value): + unit_value = re.search('[a-zA-Z]+', value) + value_to_convert = float(filter(str.isdigit, value)) + unit = unit_value.group(0) + UNITS_B = 'B' + UNITS_KB = 'KB' + UNITS_MB = 'MB' + UNITS_MiB = 'MiB' + UNITS_GiB = 'GiB' + if unit.lower() == UNITS_B.lower(): + return int(round(value_to_convert)) + elif unit.lower() == UNITS_KB.lower(): + value_converted = value_to_convert * 1000 + return int(round(value_converted)) + elif unit.lower() == UNITS_MB.lower(): + value_converted = value_to_convert * 1000 * 1000 + return int(round(value_converted)) + elif unit.lower() == UNITS_MiB.lower(): + value_converted = value_to_convert * 1024 * 1024 + return int(round(value_converted)) + elif unit.lower() == UNITS_GiB.lower(): + value_converted = value_to_convert * 1024 * 1024 * 1024 + return int(round(value_converted)) + + def create_docker_dict(self, dict_list): + dockerdict = {} + for row in dict_list[0:]: + cid = row.get('CONTAINER ID') + if cid: + key = 'DOCKER_STATS|' + str(cid) + dockerdict[key] = {} + dockerdict[key]['NAME'] = row.get('NAME') + + splitcol = row.get('CPU %') + cpu = re.split("%", str(splitcol)) + dockerdict[key]['CPU%'] = str(cpu[0]) + + splitcol = row.get('MEM USAGE / LIMIT') + memuse = re.split(" / ", str(splitcol)) + # converting MiB and GiB to bytes + dockerdict[key]['MEM_BYTES'] = str(self.convert_to_bytes(memuse[0])) + dockerdict[key]['MEM_LIMIT_BYTES'] = str(self.convert_to_bytes(memuse[1])) + + splitcol = row.get('MEM %') + mem = re.split("%", str(splitcol)) + dockerdict[key]['MEM%'] = str(mem[0]) + + splitcol = row.get('NET I/O') + netio = re.split(" / ", str(splitcol)) + dockerdict[key]['NET_IN_BYTES'] = str(self.convert_to_bytes(netio[0])) + dockerdict[key]['NET_OUT_BYTES'] = str(self.convert_to_bytes(netio[1])) + + splitcol = row.get('BLOCK I/O') + blockio = re.split(" / ", str(splitcol)) + dockerdict[key]['BLOCK_IN_BYTES'] = str(self.convert_to_bytes(blockio[0])) + dockerdict[key]['BLOCK_OUT_BYTES'] = str(self.convert_to_bytes(blockio[1])) + + dockerdict[key]['PIDS'] = row.get('PIDS') + return dockerdict + + def update_dockerstats_command(self): + cmd = "docker stats --no-stream -a" + data = self.run_command(cmd) + if not data: + log_error("'{}' returned null output".format(cmd)) + return False + dockerdata = self.format_docker_cmd_output(data) + if not dockerdata: + log_error("formatting for docker output failed") + return False + # wipe out all data from state_db before updating + self.state_db.delete_all_by_pattern('STATE_DB', 'DOCKER_STATS|*') + for k1,v1 in dockerdata.iteritems(): + for k2,v2 in v1.iteritems(): + self.update_state_db(k1, k2, v2) + return True + + def update_processstats_command(self): + data = self.run_command("ps -eo uid,pid,ppid,%mem,%cpu,stime,tty,time,cmd --sort -%cpu | head -1024") + processdata = self.format_process_cmd_output(data) + value = "" + # wipe out all data before updating with new values + self.state_db.delete_all_by_pattern('STATE_DB', 'PROCESS_STATS|*') + for row in processdata[0:]: + cid = row.get('PID') + if cid: + value = 'PROCESS_STATS|' + str(cid) + uid = row.get('UID') + self.update_state_db(value, 'UID', uid) + ppid = row.get('PPID') + self.update_state_db(value, 'PPID', ppid) + cpu = row.get('%CPU') + self.update_state_db(value, '%CPU', str(cpu)) + mem = row.get('%MEM') + self.update_state_db(value, '%MEM', str(mem)) + stime = row.get('STIME') + self.update_state_db(value, 'STIME', stime) + tty = row.get('TT') + self.update_state_db(value, 'TT', tty) + time = row.get('TIME') + self.update_state_db(value, 'TIME', time) + cmd = row.get('CMD') + self.update_state_db(value, 'CMD', cmd) + + def update_state_db(self, key1, key2, value2): + self.state_db.set('STATE_DB', key1, key2, value2) + + def run(self): + self.update_dockerstats_command() + datetimeobj = datetime.now() + # Adding key to store latest update time. + self.update_state_db('DOCKER_STATS|LastUpdateTime', 'lastupdate', datetimeobj) + self.update_processstats_command() + self.update_state_db('PROCESS_STATS|LastUpdateTime', 'lastupdate', datetimeobj) + +# main start +def main(): + log_info("process-docker stats daemon starting up..") + if not os.getuid() == 0: + log_error("Must be root to run process-docker daemon") + print "Error: Must be root to run process-docker daemon" + sys.exit(1) + pd = ProcDockerStats() + # Data need to be updated every 2 mins. hence adding delay of 120 seconds + while True: + pd.run() + time.sleep(120) + log_info("process-docker stats daemon exited") + +if __name__ == '__main__': + main() + diff --git a/files/image_config/procdockerstatsd/procdockerstatsd.service b/files/image_config/procdockerstatsd/procdockerstatsd.service new file mode 100644 index 000000000000..4e38c350a577 --- /dev/null +++ b/files/image_config/procdockerstatsd/procdockerstatsd.service @@ -0,0 +1,13 @@ +[Unit] +Description=Process and docker CPU/memory utilization data export daemon +Requires=database.service updategraph.service +After=database.service updategraph.service + +[Service] +Type=simple +ExecStart=/usr/bin/procdockerstatsd +Restart=Always + +[Install] +WantedBy=multi-user.target + diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index f54c13697753..416075d8b33a 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -2,7 +2,7 @@ SERVICE="swss" PEER="syncd" -DEPENDENT="teamd radv" +DEPENDENT="teamd radv dhcp_relay" DEBUGLOG="/tmp/swss-syncd-debug.log" LOCKFILE="/tmp/swss-syncd-lock" diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py index 8b057e4123a2..954f450c613b 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py @@ -38,10 +38,12 @@ def __init__(self, fan_index, drawer_index = 1, psu_fan = False): self.fan_speed_set_path = "fan{}_speed_set".format(self.index) self.fan_presence_path = "fan{}_status".format(self.drawer_index) self.fan_max_speed_path = "fan{}_max".format(self.index) + self._name = "fan{}".format(fan_index + 1) else: self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index) self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) - self.fan_max_speed_path = "psu{}_max".format(self.index) + self._name = "fan(PSU{})".format(fan_index) + self.fan_max_speed_path = None self.fan_status_path = "fan{}_fault".format(self.index) self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index) self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index) @@ -49,6 +51,9 @@ def __init__(self, fan_index, drawer_index = 1, psu_fan = False): self.fan_pwm_path = "pwm1" self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index) + def get_name(self): + return self._name + def get_status(self): """ Retrieves the operational status of fan @@ -123,7 +128,11 @@ def get_speed(self): speed_in_rpm = int(fan_curr_speed.read()) except (ValueError, IOError): speed_in_rpm = 0 - + + if self.fan_max_speed_path is None: + # in case of max speed unsupported, we just return speed in unit of RPM. + return speed_in_rpm + max_speed_in_rpm = self._get_max_speed_in_rpm() speed = 100*speed_in_rpm/max_speed_in_rpm @@ -136,11 +145,10 @@ def get_target_speed(self): Returns: int: percentage of the max fan speed """ - speed = 0 - if self.is_psu_fan: # Not like system fan, psu fan speed can not be modified, so target speed is N/A - return speed + return self.get_speed() + try: with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm: pwm = int(fan_pwm.read()) @@ -243,4 +251,4 @@ def get_speed_tolerance(self): considered tolerable """ # The tolerance value is fixed as 20% for all the Mellanox platform - return 20 \ No newline at end of file + return 20 diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py index 25461986f37a..6d81ca3e7b51 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py @@ -24,6 +24,7 @@ def __init__(self): self._chassis.initialize_psu() self._chassis.initialize_fan() self._chassis.initialize_eeprom() + self._chassis.initialize_thermals() def _is_host(self): """ diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py index 0789f67e4f09..4207e3c3c26f 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py @@ -60,6 +60,7 @@ def __init__(self, psu_index, sku): psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) #psu_oper_status should always be present for all SKUs self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status) + self._name = "PSU{}".format(psu_index + 1) if sku in hwsku_dict_psu: filemap = psu_profile_list[hwsku_dict_psu[sku]] @@ -92,7 +93,10 @@ def __init__(self, psu_index, sku): fan = Fan(psu_index, psu_index, True) if fan.get_presence(): - self._fan = fan + self._fan_list.append(fan) + + def get_name(self): + return self._name def _read_generic_file(self, filename, len): """ diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py index 6862b3fb258b..0535f306cc8f 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -36,28 +36,36 @@ THERMAL_API_GET_TEMPERATURE = "get_temperature" THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" +THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD = "get_high_critical_threshold" + +THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0 HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" thermal_api_handler_cpu_core = { THERMAL_API_GET_TEMPERATURE:"cpu_core{}", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max" + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_core{}_crit" } thermal_api_handler_cpu_pack = { THERMAL_API_GET_TEMPERATURE:"cpu_pack", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max" + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_pack_crit" } thermal_api_handler_module = { THERMAL_API_GET_TEMPERATURE:"module{}_temp_input", - THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit" + THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"module{}_temp_emergency" } thermal_api_handler_psu = { THERMAL_API_GET_TEMPERATURE:"psu{}_temp", - THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max" + THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None } thermal_api_handler_gearbox = { THERMAL_API_GET_TEMPERATURE:"temp_input_gearbox{}", - THERMAL_API_GET_HIGH_THRESHOLD:None + THERMAL_API_GET_HIGH_THRESHOLD:None, + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None } thermal_ambient_apis = { THERMAL_DEV_ASIC_AMBIENT : "asic", @@ -233,6 +241,7 @@ }, ] + def initialize_thermals(sku, thermal_list, psu_list): # create thermal objects for all categories of sensors tp_index = hwsku_dict_thermal[sku] @@ -262,6 +271,8 @@ def initialize_thermals(sku, thermal_list, psu_list): thermal = Thermal(category, start + index, True) thermal_list.append(thermal) + + class Thermal(ThermalBase): def __init__(self, category, index, has_index, dependency = None, hint = None): """ @@ -280,9 +291,11 @@ def __init__(self, category, index, has_index, dependency = None, hint = None): self.category = category self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) + self.high_critical_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD) self.dependency = dependency self.dependent_hint = hint + def get_name(self): """ Retrieves the name of the device @@ -292,6 +305,7 @@ def get_name(self): """ return self.name + def _read_generic_file(self, filename, len): """ Read a generic file, returns the contents of the file @@ -304,6 +318,7 @@ def _read_generic_file(self, filename, len): logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) return result + def _get_file_from_api(self, api_name): if self.category == THERMAL_DEV_CATEGORY_AMBIENT: if api_name == THERMAL_API_GET_TEMPERATURE: @@ -315,9 +330,13 @@ def _get_file_from_api(self, api_name): if self.category in thermal_device_categories_singleton: filename = handler else: - filename = handler.format(self.index) + if handler: + filename = handler.format(self.index) + else: + return None return join(HW_MGMT_THERMAL_ROOT, filename) + def get_temperature(self): """ Retrieves current temperature reading from thermal @@ -337,8 +356,11 @@ def get_temperature(self): if value_str is None: return None value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None return value_float / 1000.0 + def get_high_threshold(self): """ Retrieves the high threshold temperature of thermal @@ -353,4 +375,25 @@ def get_high_threshold(self): if value_str is None: return None value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None + return value_float / 1000.0 + + + def get_high_critical_threshold(self): + """ + Retrieves the high critical threshold temperature of thermal + + Returns: + A float number, the high critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + if self.high_critical_threshold is None: + return None + value_str = self._read_generic_file(self.high_critical_threshold, 0) + if value_str is None: + return None + value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None return value_float / 1000.0