Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[201911][thermal control] Backport feature from master branch #4677

Merged
merged 15 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json
80 changes: 80 additions & 0 deletions device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "true",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
},
{
"type": "fan.all.good"
}
],
"actions": [
{
"type": "thermal.recover"
}
]
}
]
}
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3420-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn4700-r0/thermal_policy.json
3 changes: 2 additions & 1 deletion dockers/docker-platform-monitor/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ RUN apt-get update && \
rrdtool \
python-smbus \
ethtool \
dmidecode && \
dmidecode \
i2c-tools && \
pip install enum34

{% if docker_platform_monitor_debs.strip() -%}
Expand Down
11 changes: 11 additions & 0 deletions dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,14 @@ stdout_logfile=syslog
stderr_logfile=syslog
startsecs=10
{% endif %}

{% if not skip_thermalctld %}
[program:thermalctld]
command=/usr/bin/thermalctld
priority=9
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}
4 changes: 4 additions & 0 deletions dockers/docker-platform-monitor/start.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,7 @@ supervisorctl start psud
supervisorctl start syseepromd
{% endif %}

{% if not skip_thermalctld %}
supervisorctl start thermalctld
{% endif %}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
From 3512488c981eb81d51ce92cb3573721e36861f56 Mon Sep 17 00:00:00 2001
From: Junchao Chen <junchao@mellanox.com>
Date: Fri, 29 May 2020 10:38:53 +0300
Subject: [PATCH] Disable hw-management thermal control service

---
usr/usr/bin/hw-management.sh | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 65e5d39..0d1c4a1 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -832,7 +832,9 @@ do_start()
if [ -f $config_path/max_tachos ]; then
max_tachos=$(<$config_path/max_tachos)
fi
- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
+ # Disable hw-management thermal control because
+ # SONiC already implement it
+ #$THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
}

do_stop()
--
1.9.1

1 change: 1 addition & 0 deletions platform/mellanox/mlnx-platform-api.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl
$(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api
$(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2
$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE)
SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)

export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))"
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
.cache/
3 changes: 3 additions & 0 deletions platform/mellanox/mlnx-platform-api/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aliases]
test=pytest
9 changes: 9 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
maintainer_email='kevinw@mellanox.com',
packages=[
'sonic_platform',
'tests'
],
setup_requires= [
'pytest-runner'
],
tests_require = [
'pytest',
'mock>=2.0.0'
],
classifiers=[
'Development Status :: 3 - Alpha',
Expand All @@ -26,5 +34,6 @@
'Topic :: Utilities',
],
keywords='sonic SONiC platform PLATFORM',
test_suite='setup.get_test_suite'
)

Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__all__ = ["platform", "chassis"]
from sonic_platform import *
from sonic_platform import *
32 changes: 30 additions & 2 deletions platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sonic_daemon_base.daemon_base import Logger
from os import listdir
from os.path import isfile, join
from glob import glob
import sys
import io
import re
Expand All @@ -28,12 +29,17 @@
MLNX_NUM_PSU = 2

GET_HWSKU_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.hwsku"
GET_PLATFORM_CMD = "sonic-cfggen -d -v DEVICE_METADATA.localhost.platform"

EEPROM_CACHE_ROOT = '/var/cache/sonic/decode-syseeprom'
EEPROM_CACHE_FILE = 'syseeprom_cache'

HWMGMT_SYSTEM_ROOT = '/var/run/hw-management/system/'

MST_DEVICE_NAME_PATTERN = '/dev/mst/mt[0-9]*_pciconf0'
MST_DEVICE_RE_PATTERN = '/dev/mst/mt([0-9]*)_pciconf0'
SPECTRUM1_CHIP_ID = '52100'

#reboot cause related definitions
REBOOT_CAUSE_ROOT = HWMGMT_SYSTEM_ROOT

Expand All @@ -55,6 +61,7 @@ def __init__(self):

# Initialize SKU name
self.sku_name = self._get_sku_name()
self.platform_name = self._get_platform_name()
mi = get_machine_info()
if mi is not None:
self.name = mi['onie_platform']
Expand Down Expand Up @@ -93,11 +100,21 @@ def initialize_fan(self):
num_of_fan, num_of_drawer = self._extract_num_of_fans_and_fan_drawers()
multi_rotor_in_drawer = num_of_fan > num_of_drawer

# Fan's direction isn't supported on spectrum 1 devices for now
mst_dev_list = glob(MST_DEVICE_NAME_PATTERN)
if not mst_dev_list:
raise RuntimeError("Can't get chip type due to {} not found".format(MST_DEVICE_NAME_PATTERN))
m = re.search(MST_DEVICE_RE_PATTERN, mst_dev_list[0])
if m.group(1) == SPECTRUM1_CHIP_ID:
has_fan_dir = False
else:
has_fan_dir = True

for index in range(num_of_fan):
if multi_rotor_in_drawer:
fan = Fan(index, index/2)
fan = Fan(has_fan_dir, index, index/2, False, self.platform_name)
else:
fan = Fan(index, index)
fan = Fan(has_fan_dir, index, index, False, self.platform_name)
self._fan_list.append(fan)


Expand Down Expand Up @@ -230,6 +247,12 @@ def _get_sku_name(self):
return out.rstrip('\n')


def _get_platform_name(self):
p = subprocess.Popen(GET_PLATFORM_CMD, shell=True, stdout=subprocess.PIPE)
out, err = p.communicate()
return out.rstrip('\n')


def _get_port_position_tuple_by_sku_name(self):
position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku_name]]
return position_tuple
Expand Down Expand Up @@ -442,3 +465,8 @@ def get_change_event(self, timeout=0):
return True, {'sfp':port_dict}
else:
return True, {'sfp':{}}

def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

Loading