Skip to content

Commit

Permalink
[QoS] Support dynamic headroom calculation for Barefoot platforms (#2412
Browse files Browse the repository at this point in the history
)

Signed-off-by: Mariusz Stachura <mariusz.stachura@intel.com>

What I did
Adding the dynamic headroom calculation support for Barefoot platforms.

Why I did it
Enabling dynamic mode for barefoot case.

How I verified it
The community tests are adjusted and pass.
  • Loading branch information
MariuszStachura authored Oct 18, 2022
1 parent 15beee4 commit ab29920
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 2 deletions.
5 changes: 4 additions & 1 deletion cfgmgr/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ dist_cfgmgr_DATA = \
buffer_pool_mellanox.lua \
buffer_check_headroom_vs.lua \
buffer_headroom_vs.lua \
buffer_pool_vs.lua
buffer_pool_vs.lua \
buffer_check_headroom_barefoot.lua \
buffer_headroom_barefoot.lua \
buffer_pool_barefoot.lua

if DEBUG
DBGFLAGS = -ggdb -DDEBUG
Expand Down
6 changes: 6 additions & 0 deletions cfgmgr/buffer_check_headroom_barefoot.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
local ret = {}

table.insert(ret, "result:true")
table.insert(ret, "debug:No need to check port headroom limit as shared headroom pool model is supported.")

return ret
147 changes: 147 additions & 0 deletions cfgmgr/buffer_headroom_barefoot.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
-- KEYS - profile name
-- ARGV[1] - port speed
-- ARGV[2] - cable length
-- ARGV[3] - port mtu
-- ARGV[4] - gearbox delay

-- Parameters retried from databases:
-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
-- small packet percentage: the parameter which is used to control worst case regarding the cell utilization
-- mtu: the mtu of lossless packet
-- From STATE_DB.ASIC_TABLE:
-- cell size: cell_size of the ASIC
-- pipeline_latency: the latency (XON)
-- mac_phy_delay: the bytes held in the switch chip's egress pipeline and PHY when XOFF has been generated
-- peer_response_time: the bytes that are held in the peer switch's pipeline and will be send out when the XOFF packet is received

local lossless_mtu
local small_packet_percentage
local cell_size
local pipeline_latency
local mac_phy_delay
local peer_response_time

local port_speed = tonumber(ARGV[1])
local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
local port_mtu = tonumber(ARGV[3])
local gearbox_delay = tonumber(ARGV[4])

local config_db = "4"
local state_db = "6"

local ret = {}

-- Pause quanta should be taken for each operating speed is defined in IEEE 802.3 31B.3.7.
-- The key of table pause_quanta_per_speed is operating speed at Mb/s.
-- The value of table pause_quanta_per_speed is the number of pause_quanta.
local pause_quanta_per_speed = {}
pause_quanta_per_speed[400000] = 905
pause_quanta_per_speed[200000] = 453
pause_quanta_per_speed[100000] = 394
pause_quanta_per_speed[50000] = 147
pause_quanta_per_speed[40000] = 118
pause_quanta_per_speed[25000] = 80
pause_quanta_per_speed[10000] = 67
pause_quanta_per_speed[1000] = 2
pause_quanta_per_speed[100] = 1

-- Get pause_quanta from the pause_quanta_per_speed table
local pause_quanta = pause_quanta_per_speed[port_speed]

if gearbox_delay == nil then
gearbox_delay = 0
end

-- Fetch ASIC info from ASIC table in STATE_DB
redis.call("SELECT", state_db)
local asic_keys = redis.call("KEYS", "ASIC_TABLE*")

-- Only one key should exist
local asic_table_content = redis.call("HGETALL", asic_keys[1])

for i = 1, #asic_table_content, 2 do
if asic_table_content[i] == "cell_size" then
cell_size = tonumber(asic_table_content[i+1])
end
if asic_table_content[i] == "pipeline_latency" then
pipeline_latency = tonumber(asic_table_content[i+1]) * 1024
end
if asic_table_content[i] == "mac_phy_delay" then
mac_phy_delay = tonumber(asic_table_content[i+1]) * 1024
end
-- If failed to get pause_quanta from the table, then use the default peer_response_time stored in state_db
if asic_table_content[i] == "peer_response_time" and pause_quanta == nil then
peer_response_time = tonumber(asic_table_content[i+1]) * 1024
end
end

-- Fetch lossless traffic info from CONFIG_DB
redis.call("SELECT", config_db)
local lossless_traffic_keys = redis.call("KEYS", "LOSSLESS_TRAFFIC_PATTERN*")

-- Only one key should exist
local lossless_traffic_table_content = redis.call("HGETALL", lossless_traffic_keys[1])
for i = 1, #lossless_traffic_table_content, 2 do
if lossless_traffic_table_content[i] == "mtu" then
lossless_mtu = tonumber(lossless_traffic_table_content[i+1])
end
if lossless_traffic_table_content[i] == "small_packet_percentage" then
small_packet_percentage = tonumber(lossless_traffic_table_content[i+1])
end
end

-- Fetch the shared headroom pool size
local shp_size = tonumber(redis.call("HGET", "BUFFER_POOL|ingress_lossless_pool", "xoff"))

-- Calculate the headroom information
local speed_of_light = 198000000
local minimal_packet_size = 64
local cell_occupancy
local worst_case_factor
local propagation_delay
local bytes_on_cable
local bytes_on_gearbox
local xoff_value
local xon_value
local headroom_size

if cell_size > 2 * minimal_packet_size then
worst_case_factor = cell_size / minimal_packet_size
else
worst_case_factor = (2 * cell_size) / (1 + cell_size)
end

cell_occupancy = (100 - small_packet_percentage + small_packet_percentage * worst_case_factor) / 100

if (gearbox_delay == 0) then
bytes_on_gearbox = 0
else
bytes_on_gearbox = port_speed * gearbox_delay / (8 * 1024)
end

-- If successfully get pause_quanta from the table, then calculate peer_response_time from it
if pause_quanta ~= nil then
peer_response_time = (pause_quanta) * 512 / 8
end

if port_speed == 400000 then
peer_response_time = 2 * peer_response_time
end

bytes_on_cable = 2 * cable_length * port_speed * 1000000000 / speed_of_light / (8 * 1024)
propagation_delay = port_mtu + bytes_on_cable + 2 * bytes_on_gearbox + mac_phy_delay + peer_response_time

-- Calculate the xoff and xon and then round up at 1024 bytes
xoff_value = lossless_mtu + propagation_delay * cell_occupancy
xoff_value = math.ceil(xoff_value / 1024) * 1024
xon_value = pipeline_latency
xon_value = math.ceil(xon_value / 1024) * 1024

headroom_size = xon_value
headroom_size = math.ceil(headroom_size / 1024) * 1024

table.insert(ret, "xon" .. ":" .. math.ceil(xon_value))
table.insert(ret, "xoff" .. ":" .. math.ceil(xoff_value))
table.insert(ret, "size" .. ":" .. math.ceil(headroom_size))

return ret
30 changes: 30 additions & 0 deletions cfgmgr/buffer_pool_barefoot.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- KEYS - None
-- ARGV - None

local result = {}
local config_db = "4"
local state_db = "6"

redis.call("SELECT", state_db)
local asic_keys = redis.call("KEYS", "ASIC_TABLE*")
local cell_size = tonumber(redis.call("HGET", asic_keys[1], "cell_size"))

-- Based on cell_size, calculate singular headroom
local ppg_headroom = 400 * cell_size

redis.call("SELECT", config_db)
local ports = redis.call("KEYS", "PORT|*")
local ports_num = #ports

-- 2 PPGs per port, 70% of possible maximum value.
local shp_size = math.ceil(ports_num * 2 * ppg_headroom * 0.7)

local ingress_lossless_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'size'))
local ingress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossy_pool', 'size'))
local egress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|egress_lossy_pool', 'size'))

table.insert(result, "ingress_lossless_pool" .. ":" .. ingress_lossless_pool_size_fixed .. ":" .. shp_size)
table.insert(result, "ingress_lossy_pool" .. ":" .. ingress_lossy_pool_size_fixed)
table.insert(result, "egress_lossy_pool" .. ":" .. egress_lossy_pool_size_fixed)

return result
2 changes: 1 addition & 1 deletion cfgmgr/buffermgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ task_process_status BufferMgr::doSpeedUpdateTask(string port)
// Although we have up to 8 PGs for now, the range to check is expanded to 32 support more PGs
set<string> lossless_pg_combinations = generateIdListFromMap(lossless_pg_id, sizeof(lossless_pg_id));

if (m_portStatusLookup[port] == "down" && m_platform == "mellanox")
if (m_portStatusLookup[port] == "down" && (m_platform == "mellanox" || m_platform == "barefoot"))
{
for (auto lossless_pg : lossless_pg_combinations)
{
Expand Down

0 comments on commit ab29920

Please sign in to comment.