Skip to content

Commit

Permalink
Support shared headroom pool on top of dynamic buffer calculation (#1581
Browse files Browse the repository at this point in the history
)

* Support shared headroom pool on top of dynamic buffer calculation

 - Feature is enabled/disabled on-the-fly by configuring over-subscribe-ration and shared headroom pool size.
   If both are configured, the shared headroom pool size will take effect.
   When turn on/off the feature, all the lossless profiles and buffer pool size will be recalculated.
 - Support calculating shared headroom pool while ingress lossless pool is statically configured.
- Check accumulative headroom before toggling SHP state
  To disable SHP results in size of PG increasing.
  Hence needs to check whether accumulative headroom exceed limit
- Split the function doUpdateStaticProfileTask into two functions
  Originally it was called for static profile only and consisted of two parts:
  - One is for dynamic th updated. It will go over all the buffer profiles
    dynamically generated according to the dynamic th and update them
  - The other is for size updated. It will go over each port referencing
    the profile and check whether the accumulative headroom exceeds limit
  Now that it is also called by shared headroom pool, we split it into
  two functions to make it more clear

Signed-off-by: Stephen Sun <stephens@nvidia.com>

How I verified it
Run vs test and regression test.
  • Loading branch information
stephenxs authored Feb 7, 2021
1 parent 1438a70 commit 5fa2329
Show file tree
Hide file tree
Showing 5 changed files with 465 additions and 37 deletions.
22 changes: 19 additions & 3 deletions cfgmgr/buffer_headroom_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

local lossless_mtu
local small_packet_percentage
local over_subscribe_ratio = 0
local cell_size
local pipeline_latency
local mac_phy_delay
Expand Down Expand Up @@ -72,8 +73,19 @@ for i = 1, #lossless_traffic_table_content, 2 do
end
end

-- Fetch DEFAULT_LOSSLESS_BUFFER_PARAMETER from CONFIG_DB
local lossless_traffic_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
-- Fetch over subscribe ratio
local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio'))

-- Fetch the shared headroom pool size
local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff'))

local shp_enabled
if shp_size ~= nil and shp_size ~= 0 or over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then
shp_enabled = true
else
shp_enabled = false
end

-- Calculate the headroom information
local speed_of_light = 198000000
Expand Down Expand Up @@ -119,7 +131,11 @@ xoff_value = math.ceil(xoff_value / 1024) * 1024
xon_value = pipeline_latency
xon_value = math.ceil(xon_value / 1024) * 1024

headroom_size = xoff_value + xon_value + speed_overhead
if shp_enabled then
headroom_size = xon_value
else
headroom_size = xoff_value + xon_value + speed_overhead
end
headroom_size = math.ceil(headroom_size / 1024) * 1024

table.insert(ret, "xon" .. ":" .. math.ceil(xon_value))
Expand Down
57 changes: 54 additions & 3 deletions cfgmgr/buffer_pool_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,24 @@ end

local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size')

-- Whether shared headroom pool is enabled?
local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio'))

-- Fetch the shared headroom pool size
local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff'))

local shp_enabled = false
if over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then
shp_enabled = true
end

if shp_size ~= nil and shp_size ~= 0 then
shp_enabled = true
else
shp_size = 0
end

-- Switch to APPL_DB
redis.call('SELECT', appl_db)

Expand All @@ -103,6 +121,7 @@ local statistics = {}

-- Fetch sizes of all of the profiles, accumulate them
local accumulative_occupied_buffer = 0
local accumulative_xoff = 0
for i = 1, #profiles, 1 do
if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
local size = tonumber(redis.call('HGET', profiles[i][1], 'size'))
Expand All @@ -114,6 +133,13 @@ for i = 1, #profiles, 1 do
profiles[i][2] = count_up_port
end
if size ~= 0 then
if shp_enabled and shp_size == 0 then
local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon'))
local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff'))
if xon ~= nil and xoff ~= nil and xon + xoff > size then
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2]
end
end
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2]
end
table.insert(statistics, {profiles[i][1], size, profiles[i][2]})
Expand All @@ -138,7 +164,7 @@ end
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))

-- Align mmu_size at cell size boundary, otherwith the sdk will complain and the syncd will faill
-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
local number_of_cells = math.floor(mmu_size / cell_size)
local ceiling_mmu_size = number_of_cells * cell_size

Expand All @@ -149,11 +175,16 @@ redis.call('SELECT', config_db)
local pools_need_update = {}
local ipools = redis.call('KEYS', 'BUFFER_POOL|ingress*')
local ingress_pool_count = 0
local ingress_lossless_pool_size = nil
for i = 1, #ipools, 1 do
local size = tonumber(redis.call('HGET', ipools[i], 'size'))
if not size then
table.insert(pools_need_update, ipools[i])
ingress_pool_count = ingress_pool_count + 1
else
if ipools[i] == 'BUFFER_POOL|ingress_lossless_pool' and shp_enabled and shp_size == 0 then
ingress_lossless_pool_size = size
end
end
end

Expand All @@ -165,7 +196,14 @@ for i = 1, #epools, 1 do
end
end

if shp_enabled and shp_size == 0 then
shp_size = math.ceil(accumulative_xoff / over_subscribe_ratio)
end

local pool_size
if shp_size then
accumulative_occupied_buffer = accumulative_occupied_buffer + shp_size
end
if ingress_pool_count == 1 then
pool_size = mmu_size - accumulative_occupied_buffer
else
Expand All @@ -176,18 +214,31 @@ if pool_size > ceiling_mmu_size then
pool_size = ceiling_mmu_size
end

local shp_deployed = false
for i = 1, #pools_need_update, 1 do
local pool_name = string.match(pools_need_update[i], "BUFFER_POOL|([^%s]+)$")
table.insert(result, pool_name .. ":" .. math.ceil(pool_size))
if shp_size ~= 0 and pool_name == "ingress_lossless_pool" then
table.insert(result, pool_name .. ":" .. math.ceil(pool_size) .. ":" .. math.ceil(shp_size))
shp_deployed = true
else
table.insert(result, pool_name .. ":" .. math.ceil(pool_size))
end
end

if not shp_deployed and shp_size ~= 0 and ingress_lossless_pool_size ~= nil then
table.insert(result, "ingress_lossless_pool:" .. math.ceil(ingress_lossless_pool_size) .. ":" .. math.ceil(shp_size))
end

table.insert(result, "debug:mmu_size:" .. mmu_size)
table.insert(result, "debug:accumulative:" .. accumulative_occupied_buffer)
table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
for i = 1, #statistics do
table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
end
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g)
table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
table.insert(result, "debug:shp_size:" .. shp_size)
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)

return result
Loading

0 comments on commit 5fa2329

Please sign in to comment.