Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QoS] Support dynamic headroom calculation for Barefoot platforms #6151

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8381a27
[QoS] Support dynamic headroom calculation for Barefoot platforms
MariuszStachura Aug 11, 2022
d44f8c4
Merge branch 'master' into bfn-dhc
MariuszStachura Aug 31, 2022
6a37963
Use PG 6 in test_lossless_pg
MariuszStachura Aug 31, 2022
2611182
Remove unused variables
MariuszStachura Aug 31, 2022
e19d900
Merge branch 'master' into bfn-dhc
MariuszStachura Sep 1, 2022
d52c12e
Update tests/qos/test_buffer.py
MariuszStachura Sep 2, 2022
0b8c13c
Remove not needed if
MariuszStachura Sep 2, 2022
cbc3949
Do not disable log analyzer and get rid of not needed check
MariuszStachura Sep 2, 2022
019a14c
Merge branch 'master' into bfn-dhc
MariuszStachura Sep 16, 2022
18557ba
Merge branch 'master' of https://github.com/Azure/sonic-mgmt into bfn…
MariuszStachura Sep 19, 2022
243e638
Exclude all changes not related to dynamic buffer calculation
MariuszStachura Sep 23, 2022
bbc66f1
Merge branch 'master' into bfn-dhc
MariuszStachura Sep 23, 2022
4101f27
Merge branch 'master' into bfn-dhc
MariuszStachura Sep 23, 2022
85e768d
Change not needed
MariuszStachura Sep 23, 2022
78ed3dc
Merge branch 'master' into bfn-dhc
MariuszStachura Sep 28, 2022
1967c07
Fix pre-commit issues
MariuszStachura Oct 19, 2022
d9bc310
Merge branch 'master' into bfn-dhc
MariuszStachura Oct 19, 2022
b3a6b2a
Just some more formatting fixes after merge
MariuszStachura Oct 19, 2022
148dac8
Fix even more pre-commit checks
MariuszStachura Oct 19, 2022
fb31e77
Remove unused local variable 'asic_type'
MariuszStachura Oct 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions tests/qos/files/dynamic_buffer_param.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,39 @@
"x86_64-nvidia_sn5600-r0": "800000",
"x86_64-nvidia_sn5600_simx-r0": "800000"
}
},
"barefoot": {
"default_cable_length": ["5m", "40m", "300m"],
"testparam_cable_length": ["15m", "40m"],
"headroom-override": {
"add": {
"xon": "18432",
"xoff": "18432",
"size": "36864",
"dynamic_th": "1"
},
"set": {
"xon": "18432",
"xoff": "36864",
"size": "55296"
}
},
"lossless_pg": {
"headroom-override": {
"xon": "18432",
"xoff": "16384",
"size": "34816"
},
"non-default-dynamic_th": {
"dynamic_th": "7"
}
},
"lossy_pg": {
"default": "4096"
},
"shared-headroom-pool": {
"size": "3153920",
"private_pg_headroom": "70400"
}
}
}
230 changes: 180 additions & 50 deletions tests/qos/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest

from tests.common import config_reload
from tests.common.barefoot_data import is_barefoot_device
from tests.common.broadcom_data import is_broadcom_device
from tests.common.utilities import wait_until
from tests.common.helpers.assertions import pytest_assert, pytest_require
Expand Down Expand Up @@ -225,52 +226,54 @@ def load_test_parameters(duthost):
with open(param_file_name) as file:
params = json.load(file)
logging.info("Loaded test parameters {} from {}".format(params, param_file_name))
ASIC_TYPE = duthost.facts['asic_type']
vendor_specific_param = params[ASIC_TYPE]
DEFAULT_CABLE_LENGTH_LIST = vendor_specific_param['default_cable_length']
TESTPARAM_HEADROOM_OVERRIDE = vendor_specific_param['headroom-override']
TESTPARAM_LOSSLESS_PG = vendor_specific_param['lossless_pg']
TESTPARAM_SHARED_HEADROOM_POOL = vendor_specific_param['shared-headroom-pool']
TESTPARAM_EXTRA_OVERHEAD = vendor_specific_param['extra_overhead']
TESTPARAM_ADMIN_DOWN = vendor_specific_param['admin-down']
MAX_SPEED_8LANE_PORT = vendor_specific_param['max_speed_8lane_platform'].get(duthost.facts['platform'])

# For ingress profile list, we need to check whether the ingress lossy profile exists
ingress_lossy_pool = duthost.shell('redis-cli -n 4 keys "BUFFER_POOL|ingress_lossy_pool"')['stdout']
if ingress_lossy_pool:
ingress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE')
ingress_profile_list.append('[BUFFER_PROFILE_TABLE:ingress_lossy_zero_profile]')

# 'admin-down' section contains references to buffer profiles
# We need to convert the format of the references according to whether table name should be in the reference
if not check_qos_db_fv_reference_with_table(duthost):
expected_pgs = TESTPARAM_ADMIN_DOWN.get('BUFFER_PG_TABLE')
if expected_pgs:
new_pgs = {}
for pg, profile in expected_pgs.items():
new_pgs[pg] = profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '')
TESTPARAM_ADMIN_DOWN['BUFFER_PG_TABLE'] = new_pgs

expected_queues = TESTPARAM_ADMIN_DOWN.get('BUFFER_QUEUE_TABLE')
if expected_queues:
new_queues = {}
for queue, profile in expected_queues.items():
new_queues[queue] = profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '')
TESTPARAM_ADMIN_DOWN['BUFFER_QUEUE_TABLE'] = new_queues

expected_ingress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE')
if expected_ingress_profile_list:
new_list = []
for profile in expected_ingress_profile_list:
new_list.append(profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', ''))
TESTPARAM_ADMIN_DOWN['BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE'] = new_list

expected_egress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_EGRESS_PROFILE_LIST_TABLE')
if expected_egress_profile_list:
new_list = []
for profile in expected_egress_profile_list:
new_list.append(profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', ''))
TESTPARAM_ADMIN_DOWN['BUFFER_PORT_EGRESS_PROFILE_LIST_TABLE'] = new_list
ASIC_TYPE = duthost.facts.get('asic_type')
vendor_specific_param = params.get(ASIC_TYPE)
DEFAULT_CABLE_LENGTH_LIST = vendor_specific_param.get('default_cable_length')
TESTPARAM_HEADROOM_OVERRIDE = vendor_specific_param.get('headroom-override')
TESTPARAM_LOSSLESS_PG = vendor_specific_param.get('lossless_pg')
TESTPARAM_SHARED_HEADROOM_POOL = vendor_specific_param.get('shared-headroom-pool')
TESTPARAM_EXTRA_OVERHEAD = vendor_specific_param.get('extra_overhead')
TESTPARAM_ADMIN_DOWN = vendor_specific_param.get('admin-down')
if 'max_speed_8lane_platform' in vendor_specific_param:
MAX_SPEED_8LANE_PORT = vendor_specific_param['max_speed_8lane_platform'].get(duthost.facts['platform'])

if TESTPARAM_ADMIN_DOWN is not None:
# For ingress profile list, we need to check whether the ingress lossy profile exists
ingress_lossy_pool = duthost.shell('redis-cli -n 4 keys "BUFFER_POOL|ingress_lossy_pool"')['stdout']
if ingress_lossy_pool:
ingress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE')
ingress_profile_list.append('[BUFFER_PROFILE_TABLE:ingress_lossy_zero_profile]')

# 'admin-down' section contains references to buffer profiles
# We need to convert the format of the references according to whether table name should be in the reference
if not check_qos_db_fv_reference_with_table(duthost):
expected_pgs = TESTPARAM_ADMIN_DOWN.get('BUFFER_PG_TABLE')
if expected_pgs:
new_pgs = {}
for pg, profile in expected_pgs.items():
new_pgs[pg] = profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '')
TESTPARAM_ADMIN_DOWN['BUFFER_PG_TABLE'] = new_pgs

expected_queues = TESTPARAM_ADMIN_DOWN.get('BUFFER_QUEUE_TABLE')
if expected_queues:
new_queues = {}
for queue, profile in expected_queues.items():
new_queues[queue] = profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', '')
TESTPARAM_ADMIN_DOWN['BUFFER_QUEUE_TABLE'] = new_queues

expected_ingress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE')
if expected_ingress_profile_list:
new_list = []
for profile in expected_ingress_profile_list:
new_list.append(profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', ''))
TESTPARAM_ADMIN_DOWN['BUFFER_PORT_INGRESS_PROFILE_LIST_TABLE'] = new_list

expected_egress_profile_list = TESTPARAM_ADMIN_DOWN.get('BUFFER_PORT_EGRESS_PROFILE_LIST_TABLE')
if expected_egress_profile_list:
new_list = []
for profile in expected_egress_profile_list:
new_list.append(profile.replace('[BUFFER_PROFILE_TABLE:', '').replace(']', ''))
TESTPARAM_ADMIN_DOWN['BUFFER_PORT_EGRESS_PROFILE_LIST_TABLE'] = new_list


def configure_shared_headroom_pool(duthost, enable):
Expand Down Expand Up @@ -300,7 +303,7 @@ def setup_module(duthosts, rand_one_dut_hostname, request):

duthost = duthosts[rand_one_dut_hostname]
detect_buffer_model(duthost)
if not is_mellanox_device(duthost) and not is_innovium_device(duthost):
if not is_mellanox_device(duthost) and not is_innovium_device(duthost) and not is_barefoot_device(duthost):
load_lossless_headroom_data(duthost)
yield
return
Expand Down Expand Up @@ -516,6 +519,10 @@ def _fetch_size_difference_for_8lane_ports(duthost, conn_graph_facts):

logging.debug("Expected pool {}, expec shp {}, curr_shp {} default ovs {}".format(expected_pool_size, expected_shp_size, curr_shp_size, DEFAULT_OVER_SUBSCRIBE_RATIO))

elif duthost.facts['asic_type'] == 'barefoot':
expected_pool_size = 43067728
expected_shp_size = 3153920

pytest_assert(ensure_pool_size(duthost, 20, expected_pool_size, expected_shp_size, ingress_lossless_pool_oid),
"Pool size isn't correct in database: expected pool {} shp {}, size in APPL_DB pool {} shp {}, size in ASIC_DB {}".format(
expected_pool_size,
Expand Down Expand Up @@ -918,7 +925,7 @@ def test_change_speed_cable(duthosts, rand_one_dut_hostname, conn_graph_facts, p
if speed_to_test == original_speed and cable_len_to_test == original_cable_len:
pytest.skip('Speed, MTU and cable length matches the default value, nothing to test, skip')
expected_profile = make_expected_profile_name(speed_to_test, cable_len_to_test)
if duthost.shell('redis-cli hget BUFFER_PROFILE_TABLE:{}'.format(expected_profile))['stdout']:
if duthost.shell('redis-cli keys BUFFER_PROFILE_TABLE:{}'.format(expected_profile))['stdout'] != '':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed to 'keys'?

Copy link
Author

@MariuszStachura MariuszStachura Oct 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was failing for me and I thought it must have been a mistake:

admin@sonic:~$ redis-cli keys BUFFER_PROFILE_TABLE:expected_profile
(empty array)
admin@sonic:~$ redis-cli hget BUFFER_PROFILE_TABLE:expected_profile
(error) ERR wrong number of arguments for 'hget' command

hget always failed

pytest.skip('The buffer profile has existed, most of the checks can not be performed, skip')

try:
Expand Down Expand Up @@ -1316,6 +1323,9 @@ def test_shared_headroom_pool_configure(duthosts, rand_one_dut_hostname, conn_gr

duthost = duthosts[rand_one_dut_hostname]

if duthost.facts['asic_type'] == 'barefoot':
pytest.skip('Oversubscribe ratio is not supported for Barefoot platforms yet')

pool_size_before_shp = duthost.shell('redis-cli hget BUFFER_POOL_TABLE:ingress_lossless_pool size')['stdout']
shp_size_before_shp = duthost.shell('redis-cli hget BUFFER_POOL_TABLE:ingress_lossless_pool xoff')['stdout']

Expand Down Expand Up @@ -1521,7 +1531,10 @@ def test_lossless_pg(duthosts, rand_one_dut_hostname, conn_graph_facts, port_to_
# Update it to non-default dynamic_th
logging.info('[Testcase: headroom override => dynamically calculated headroom with non-default dynamic_th]')
duthost.shell(set_command + 'non-default-dynamic_th')
expected_nondef_profile = make_expected_profile_name(original_speed, '15m', other_factors=['th2'])
if is_barefoot_device(duthost):
expected_nondef_profile = make_expected_profile_name(original_speed, '15m', other_factors=['th7'])
else:
expected_nondef_profile = make_expected_profile_name(original_speed, '15m', other_factors=['th2'])
check_pg_profile(duthost, buffer_pg, expected_nondef_profile)
# A new profile should be created in ASIC DB
profile_oid, _ = check_buffer_profile_details(duthost, initial_asic_db_profiles, expected_nondef_profile, None, pool_oid, port_to_test)
Expand Down Expand Up @@ -1692,9 +1705,11 @@ def _check_buffer_object_list_aligns_with_expected_ones(port_to_test, table, exp

skip_traditional_model()

if not TESTPARAM_HEADROOM_OVERRIDE:
pytest.skip('Shutdown port test skipped due to no headroom override parameter defined')
param = TESTPARAM_HEADROOM_OVERRIDE.get("add")
if not param:
pytest.skip('Shutdown port test skipped due to no headroom override parameters defined')
pytest.skip('Shutdown port test skipped due to no headroom override parameters for add operation defined')

duthost = duthosts[rand_one_dut_hostname]
is_qos_db_reference_with_table = check_qos_db_fv_reference_with_table(duthost)
Expand All @@ -1712,6 +1727,8 @@ def _check_buffer_object_list_aligns_with_expected_ones(port_to_test, table, exp

new_cable_len = '15m'

if not TESTPARAM_EXTRA_OVERHEAD:
pytest.skip('Shutdown port test skipped due to no extra overhead parameter defined')
extra_overhead = TESTPARAM_EXTRA_OVERHEAD.get(str(len(list_of_lanes)))
if not extra_overhead:
extra_overhead = TESTPARAM_EXTRA_OVERHEAD.get('default')
Expand Down Expand Up @@ -2411,6 +2428,9 @@ def _check_port_buffer_info_and_return(duthost, table, ids, port, expected_profi
buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-7', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]')
else:
buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]')
if is_barefoot_device(duthost):
buffer_table_up[KEY_2_LOSSLESS_QUEUE][1] = ('BUFFER_QUEUE_TABLE', '0-2', '[BUFFER_PROFILE_TABLE:]')
buffer_table_up[KEY_2_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5-6', '[BUFFER_PROFILE_TABLE:]')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The profile name is missing here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was intentional for BUFFER_QUEUE_TABLE in our case, thank you for thorough review, I'll try to fix pre-commit issues locally first to not trigger jobs again and again.


buffer_table_up[KEY_4_LOSSLESS_QUEUE][1] = ('BUFFER_QUEUE_TABLE', '0-1', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]')
buffer_table_up[KEY_4_LOSSLESS_QUEUE][3] = ('BUFFER_QUEUE_TABLE', '5', '[BUFFER_PROFILE_TABLE:egress_lossy_profile]')
Expand Down Expand Up @@ -2578,6 +2598,8 @@ def calculate_headroom_data(duthost, port_to_test):
"""
if ASIC_TYPE == 'mellanox':
return mellanox_calculate_headroom_data(duthost, port_to_test)
elif ASIC_TYPE == 'barefoot':
return barefoot_calculate_headroom_data(duthost, port_to_test)
else:
return False, None

Expand Down Expand Up @@ -2754,3 +2776,111 @@ def mellanox_calculate_headroom_data(duthost, port_to_test):
head_room_data['xon'] = int(xon_value)
head_room_data['xoff'] = int(xoff_value)
return True, head_room_data

def barefoot_calculate_headroom_data(duthost, port_to_test):
"""
Calculates the headroom size based on the input port attributes; speed, cable length, MTU
It is the Python implementation of buffer_headroom_barefoot.lua script.
It can be found here: https://github.com/Azure/sonic-swss/blob/master/cfgmgr/buffer_headroom_barefoot.lua
"""
global ASIC_TABLE_KEYS_LOADED
global CELL_SIZE
global PIPELINE_LATENCY
global MAC_PHY_DELAY

global LOSSLESS_TRAFFIC_PATTERN_KEYS_LOADED
global LOSSLESS_MTU
global SMALL_PACKET_PERCENTAGE

peer_response_time = 0
use_default_peer_response_time = False

head_room_data = {}

# Init pause_quanta_per_speed_dict
pause_quanta_per_speed_dict = {400000: 905, 200000: 453, 100000: 394, 50000: 147, 40000: 118, 25000: 80, 10000: 67,
1000: 2, 100: 1}
port_info = _compose_dict_from_cli(duthost.shell('redis-cli -n 4 hgetall "PORT|{}"'.format(port_to_test))['stdout'].split('\n'))
if port_info.get('autoneg') == 'on':
adv_speeds = port_info.get('adv_speeds')
if adv_speeds and adv_speeds != 'all':
available_speeds = adv_speeds
else:
available_speeds = duthost.shell('redis-cli -n 6 hget "PORT_TABLE|{}" "supported_speeds"'.format(port_to_test))['stdout']
port_speed_raw = natsorted(available_speeds.split(','))[-1]
else:
port_speed_raw = port_info.get('speed')
if port_speed_raw:
port_speed = int(port_speed_raw)
else:
logging.error("failed to get speed from config db for port {}".format(port_to_test))
return False, None

# Get pause_quanta with port speed from pause_quanta_per_speed_dict
if port_speed in pause_quanta_per_speed_dict.keys():
pause_quanta = pause_quanta_per_speed_dict[port_speed]
else:
# Get default peer response time from State DB
# Command: redis-cli -n 6 hget "ASIC_TABLE|BAREFOOT-TOFINO-2" "peer_response_time"
peer_response_time_keys = duthost.shell('redis-cli -n 6 keys ASIC_TABLE*')['stdout']
peer_response_time = float(duthost.shell('redis-cli -n 6 hget "{}" "peer_response_time"'.format(peer_response_time_keys))['stdout'])
use_default_peer_response_time = True
# Get port mtu from config DB
# Command: redis-cli -n 4 hget "PORT|Ethernet0" "mtu"
port_mtu_raw = duthost.shell('redis-cli -n 4 hget "PORT|{}" "mtu"'.format(port_to_test))['stdout']
if port_mtu_raw:
port_mtu = int(port_mtu_raw)
else:
logging.error("failed to get MTU from config db for port {}".format(port_to_test))
return False, None

# Get cable length from config DB
# Command: redis-cli -n 4 hget "CABLE_LENGTH|AZURE" 'Ethernet0'
cable_length_keys = duthost.shell('redis-cli -n 4 keys *CABLE_LENGTH*')['stdout']
cable_length_raw = duthost.shell('redis-cli -n 4 hget "{}" "{}"'.format(cable_length_keys, port_to_test))['stdout']
if cable_length_raw and cable_length_raw.endswith('m'):
cable_length = float(cable_length_raw[:-1])
else:
logging.error("failed to get a valid cable length from config db for port {}".format(port_to_test))
return False, None

logging.info('port_speed = {}, port_mtu = {}, cable_length = {}'.format(port_speed, port_mtu, cable_length))

if not ASIC_TABLE_KEYS_LOADED:
CELL_SIZE, PIPELINE_LATENCY, MAC_PHY_DELAY = get_asic_table_data_from_db(duthost)

if not LOSSLESS_TRAFFIC_PATTERN_KEYS_LOADED:
LOSSLESS_MTU, SMALL_PACKET_PERCENTAGE = get_lossless_traffic_pattern_data_from_db(duthost)

speed_of_light = 198000000
minimal_packet_size = 64

if CELL_SIZE > 2 * minimal_packet_size:
worst_case_factor = CELL_SIZE / minimal_packet_size
else:
worst_case_factor = (2 * CELL_SIZE) / (1 + CELL_SIZE)

cell_occupancy = (100 - SMALL_PACKET_PERCENTAGE + SMALL_PACKET_PERCENTAGE * worst_case_factor) / 100

if not use_default_peer_response_time:
peer_response_time = (float(pause_quanta)) * 512 / (1024 * 8)

if port_speed == 400000:
peer_response_time *= 2
bytes_on_cable = 2 * (float(cable_length)) * port_speed * 1000000000 / speed_of_light / (8 * 1024)
propagation_delay = port_mtu + bytes_on_cable + MAC_PHY_DELAY + peer_response_time * 1024

# Calculate the xoff and xon and then round up at 1024 bytes
xoff_value = LOSSLESS_MTU + propagation_delay * cell_occupancy
xoff_value = math.ceil(xoff_value / 1024) * 1024
xon_value = PIPELINE_LATENCY
xon_value = math.ceil(xon_value / 1024) * 1024

headroom_size = xon_value

headroom_size = math.ceil(headroom_size / 1024) * 1024

head_room_data['size'] = int(headroom_size)
head_room_data['xon'] = int(xon_value)
head_room_data['xoff'] = int(xoff_value)
return True, head_room_data