Skip to content

Commit

Permalink
[Dynamic Buffer Calc] Enhance the logic to check maximum headroom exc…
Browse files Browse the repository at this point in the history
…eeding to cover corner scenarios (#2763)

What I did
Enhance the logic to check maximum headroom exceeding to cover corner scenarios

Currently, the logic to check the maximum headroom exceeding works well when a user changes any buffer configuration in the dynamic buffer model, preventing all problematic configurations from being applied to the ASIC.
However, it can fail when a problematic configuration is config_db.json and config reload is executed. To cover this scenario, the following actions need to be done:

Take the pending PG keys and buffer profiles into account when calculating the maximum headroom
Existing buffer PGs and buffer profiles can be in the pending queue since there are a large number of notifications needed to be handled during system initialization, which takes time.
Take the lossy PG into account when calculating the maximum headroom.
Non-default lossy PG can be added by the user in config_db.json
Pass the PG to the Lua plugin when refreshing PGs for a port
Signed-off-by: Stephen Sun stephens@nvidia.com

Why I did it
Cover corner scenarios.

How I verified it
Manual test, vs test, regression test (dynamic buffer)
  • Loading branch information
stephenxs authored and StormLiangMS committed May 25, 2023
1 parent 67552e8 commit dbda972
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 34 deletions.
68 changes: 38 additions & 30 deletions cfgmgr/buffer_check_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

local port = KEYS[1]
local input_profile_name = ARGV[1]
local input_profile_size = ARGV[2]
local input_profile_size = tonumber(ARGV[2])
local new_pg = ARGV[3]

local function is_port_with_8lanes(lanes)
Expand Down Expand Up @@ -60,7 +60,8 @@ if is_port_with_8lanes(lanes) then
pipeline_latency = pipeline_latency * 2 - 1
egress_mirror_size = egress_mirror_size * 2
end
accumulative_size = accumulative_size + 2 * pipeline_latency * 1024 + egress_mirror_size
local lossy_pg_size = pipeline_latency * 1024
accumulative_size = accumulative_size + lossy_pg_size + egress_mirror_size

-- Fetch all keys in BUFFER_PG according to the port
redis.call('SELECT', appl_db)
Expand All @@ -81,41 +82,48 @@ local function get_number_of_pgs(keyname)
return size
end

local no_input_pg = true
if new_pg ~= nil then
if get_number_of_pgs(new_pg) ~= 0 then
no_input_pg = false
new_pg = 'BUFFER_PG_TABLE:' .. new_pg
end
-- Fetch all the PGs in APPL_DB, and store them into a hash table
local pg_keys = redis.call('KEYS', 'BUFFER_PG_TABLE:' .. port .. ':*')
local all_pgs = {}
for i = 1, #pg_keys do
local profile = redis.call('HGET', pg_keys[i], 'profile')
all_pgs[pg_keys[i]] = profile
end

-- Fetch all the pending PGs, and store them into the hash table
-- Overwrite any existing entries
local pending_pg_keys = redis.call('KEYS', '_BUFFER_PG_TABLE:' .. port .. ':*')
for i = 1, #pending_pg_keys do
local profile = redis.call('HGET', pending_pg_keys[i], 'profile')
-- Remove the leading underscore when storing it into the hash table
all_pgs[string.sub(pending_pg_keys[i], 2, -1)] = profile
table.insert(debuginfo, 'debug:pending entry: ' .. pending_pg_keys[i] .. ':' .. profile)
end

if new_pg ~= nil and get_number_of_pgs(new_pg) ~= 0 then
all_pgs['BUFFER_PG_TABLE:' .. new_pg] = input_profile_name
end

-- Fetch all the PGs, accumulate the sizes
-- Handle all the PGs, accumulate the sizes
-- Assume there is only one lossless profile configured among all PGs on each port
table.insert(debuginfo, 'debug:other overhead:' .. accumulative_size)
local pg_keys = redis.call('KEYS', 'BUFFER_PG_TABLE:' .. port .. ':*')
for i = 1, #pg_keys do
local profile = redis.call('HGET', pg_keys[i], 'profile')
for pg_key, profile in pairs(all_pgs) do
local current_profile_size
if profile ~= 'ingress_lossy_profile' and (no_input_pg or new_pg ~= pg_keys[i]) then
if profile ~= input_profile_name and not no_input_pg then
local referenced_profile = redis.call('HGETALL', 'BUFFER_PROFILE_TABLE:' .. profile)
for j = 1, #referenced_profile, 2 do
if referenced_profile[j] == 'size' then
current_profile_size = tonumber(referenced_profile[j+1])
end
end
else
current_profile_size = input_profile_size
profile = input_profile_name
if profile ~= input_profile_name then
local referenced_profile_size = redis.call('HGET', 'BUFFER_PROFILE_TABLE:' .. profile, 'size')
if not referenced_profile_size then
referenced_profile_size = redis.call('HGET', '_BUFFER_PROFILE_TABLE:' .. profile, 'size')
table.insert(debuginfo, 'debug:pending profile: ' .. profile)
end
accumulative_size = accumulative_size + current_profile_size * get_number_of_pgs(pg_keys[i])
table.insert(debuginfo, 'debug:' .. pg_keys[i] .. ':' .. profile .. ':' .. current_profile_size .. ':' .. get_number_of_pgs(pg_keys[i]) .. ':accu:' .. accumulative_size)
current_profile_size = tonumber(referenced_profile_size)
else
current_profile_size = input_profile_size
end
end

if not no_input_pg then
accumulative_size = accumulative_size + input_profile_size * get_number_of_pgs(new_pg)
table.insert(debuginfo, 'debug:' .. new_pg .. '*:' .. input_profile_name .. ':' .. input_profile_size .. ':' .. get_number_of_pgs(new_pg) .. ':accu:' .. accumulative_size)
if current_profile_size == 0 then
current_profile_size = lossy_pg_size
end
accumulative_size = accumulative_size + current_profile_size * get_number_of_pgs(pg_key)
table.insert(debuginfo, 'debug:' .. pg_key .. ':' .. profile .. ':' .. current_profile_size .. ':' .. get_number_of_pgs(pg_key) .. ':accu:' .. accumulative_size)
end

if max_headroom_size > accumulative_size then
Expand Down
13 changes: 9 additions & 4 deletions cfgmgr/buffermgrdyn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1051,10 +1051,10 @@ bool BufferMgrDynamic::isHeadroomResourceValid(const string &port, const buffer_
// profile: the profile referenced by the new_pg (if provided) or all PGs
// new_pg: which pg is newly added?

if (!profile.lossless)
if (!profile.lossless && new_pg.empty())
{
SWSS_LOG_INFO("No need to check headroom for lossy PG port %s profile %s size %s pg %s",
port.c_str(), profile.name.c_str(), profile.size.c_str(), new_pg.c_str());
SWSS_LOG_INFO("No need to check headroom for lossy PG port %s profile %s size %s without a PG specified",
port.c_str(), profile.name.c_str(), profile.size.c_str());
return true;
}

Expand Down Expand Up @@ -1497,7 +1497,7 @@ task_process_status BufferMgrDynamic::refreshPgsForPort(const string &port, cons

// Calculate whether accumulative headroom size exceeds the maximum value
// Abort if it does
if (!isHeadroomResourceValid(port, m_bufferProfileLookup[newProfile], exactly_matched_key))
if (!isHeadroomResourceValid(port, m_bufferProfileLookup[newProfile], key))
{
SWSS_LOG_ERROR("Update speed (%s) and cable length (%s) for port %s failed, accumulative headroom size exceeds the limit",
speed.c_str(), cable_length.c_str(), port.c_str());
Expand Down Expand Up @@ -3005,6 +3005,11 @@ task_process_status BufferMgrDynamic::handleSingleBufferPgEntry(const string &ke
bufferPg.dynamic_calculated = profileRef.dynamic_calculated;
bufferPg.configured_profile_name = profileName;
bufferPg.lossless = profileRef.lossless;
if (!profileRef.lossless && !isHeadroomResourceValid(port, profileRef, key))
{
SWSS_LOG_ERROR("Unable to configure lossy PG %s, accumulative headroom size exceeds the limit", key.c_str());
return task_process_status::task_failed;
}
}
bufferPg.static_configured = true;
bufferPg.configured_profile_name = profileName;
Expand Down
72 changes: 72 additions & 0 deletions tests/test_buffer_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,8 +775,80 @@ def test_removeBufferPool(self, dvs, testlog):
def test_bufferPortMaxParameter(self, dvs, testlog):
self.setup_db(dvs)

# Update log level so that we can analyze the log in case the test failed
logfvs = self.config_db.wait_for_entry("LOGGER", "buffermgrd")
old_log_level = logfvs.get("LOGLEVEL")
logfvs["LOGLEVEL"] = "INFO"
self.config_db.update_entry("LOGGER", "buffermgrd", logfvs)

# Check whether port's maximum parameter has been exposed to STATE_DB
fvs = self.state_db.wait_for_entry("BUFFER_MAX_PARAM_TABLE", "Ethernet0")
assert int(fvs["max_queues"]) and int(fvs["max_priority_groups"])

_, oa_pid = dvs.runcmd("pgrep orchagent")

try:
fvs["max_headroom_size"] = "122880"
self.state_db.update_entry("BUFFER_MAX_PARAM_TABLE", "Ethernet0", fvs)

# Startup interface
dvs.port_admin_set('Ethernet0', 'up')
# Wait for the lossy profile to be handled
self.app_db.wait_for_field_match("BUFFER_PG_TABLE", "Ethernet0:0", {"profile": "ingress_lossy_profile"})

# Stop orchagent to simulate the scenario that the system is during initialization
dvs.runcmd("kill -s SIGSTOP {}".format(oa_pid))

# Create a lossless profile
profile_fvs = {'xon': '19456',
'xoff': '10240',
'size': '29696',
'dynamic_th': '0',
'pool': 'ingress_lossless_pool'}
self.config_db.update_entry('BUFFER_PROFILE', 'test', profile_fvs)

self.config_db.update_entry('BUFFER_PG', 'Ethernet0|3-4', {'profile': 'test'})

# Make sure the entry has been handled by buffermgrd and is pending on orchagent's queue
self.app_db.wait_for_field_match("_BUFFER_PG_TABLE", "Ethernet0:3-4", {"profile": "test"})

# Should not be added due to the maximum headroom exceeded
self.config_db.update_entry('BUFFER_PG', 'Ethernet0|1', {'profile': 'ingress_lossy_profile'})
# Should not be added due to the maximum headroom exceeded
self.config_db.update_entry('BUFFER_PG', 'Ethernet0|6', {'profile': 'test'})

# Resume orchagent
dvs.runcmd("kill -s SIGCONT {}".format(oa_pid))

# Check whether BUFFER_PG_TABLE is updated as expected
self.app_db.wait_for_field_match("BUFFER_PG_TABLE", "Ethernet0:3-4", {"profile": "test"})

keys = self.app_db.get_keys('BUFFER_PG_TABLE')

assert 'Ethernet0:1' not in keys
assert 'Ethernet0:6' not in keys

# Update the profile
profile_fvs['size'] = '28672'
profile_fvs['xoff'] = '9216'
self.config_db.update_entry('BUFFER_PROFILE', 'test', profile_fvs)
self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', 'test', profile_fvs)
finally:
dvs.runcmd("kill -s SIGCONT {}".format(oa_pid))

self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|3-4')
self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|1')
self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|6')
self.config_db.delete_entry('BUFFER_PROFILE', 'test')

fvs.pop("max_headroom_size")
self.state_db.delete_entry("BUFFER_MAX_PARAM_TABLE", "Ethernet0")
self.state_db.update_entry("BUFFER_MAX_PARAM_TABLE", "Ethernet0", fvs)

if old_log_level:
logfvs["LOGLEVEL"] = old_log_level
self.config_db.update_entry("LOGGER", "buffermgrd", logfvs)

dvs.port_admin_set('Ethernet0', 'down')

self.cleanup_db(dvs)

0 comments on commit dbda972

Please sign in to comment.