Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dynamic buffer calc] Support dynamic buffer calculation #1338

Merged
merged 8 commits into from
Dec 15, 2020
12 changes: 11 additions & 1 deletion cfgmgr/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ LIBNL_LIBS = -lnl-genl-3 -lnl-route-3 -lnl-3

bin_PROGRAMS = vlanmgrd teammgrd portmgrd intfmgrd buffermgrd vrfmgrd nbrmgrd vxlanmgrd sflowmgrd natmgrd coppmgrd

cfgmgrdir = $(datadir)/swss

dist_cfgmgr_DATA = \
buffer_check_headroom_mellanox.lua \
buffer_headroom_mellanox.lua \
buffer_pool_mellanox.lua \
buffer_check_headroom_vs.lua \
buffer_headroom_vs.lua \
buffer_pool_vs.lua

if DEBUG
DBGFLAGS = -ggdb -DDEBUG
else
Expand All @@ -31,7 +41,7 @@ intfmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_LDADD = -lswsscommon

buffermgrd_SOURCES = buffermgrd.cpp buffermgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
buffermgrd_SOURCES = buffermgrd.cpp buffermgr.cpp buffermgrdyn.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
buffermgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_LDADD = -lswsscommon
Expand Down
131 changes: 131 additions & 0 deletions cfgmgr/buffer_check_headroom_mellanox.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
-- KEYS - port name
-- ARGV[1] - profile name
-- ARGV[2] - new size
-- ARGV[3] - pg to add

local port = KEYS[1]
local input_profile_name = ARGV[1]
local input_profile_size = ARGV[2]
local new_pg = ARGV[3]
local accumulative_size = 0

local appl_db = "0"
local config_db = "4"
local state_db = "6"

local ret_true = {}
local ret_false = {}
local ret = {}
local default_ret = {}

table.insert(ret_true, "result:true")
table.insert(ret_false, "result:false")

-- Fetch the cable length from CONFIG_DB
redis.call('SELECT', config_db)
local cable_length_keys = redis.call('KEYS', 'CABLE_LENGTH*')
if #cable_length_keys == 0 then
return ret_true
end

-- Check whether cable length exceeds 300m (maximum value in the non-dynamic-buffer solution)
local cable_length_str = redis.call('HGET', cable_length_keys[1], port)
if cable_length_str == nil then
return ret_true
end
local cable_length = tonumber(string.sub(cable_length_str, 1, -2))
if cable_length > 300 then
default_ret = ret_false
else
default_ret = ret_true
end
table.insert(default_ret, 'debug:no max_headroom_size configured, check cable length instead')

local speed = redis.call('HGET', 'PORT|' .. port, 'speed')

-- Fetch the threshold from STATE_DB
redis.call('SELECT', state_db)

local max_headroom_size = tonumber(redis.call('HGET', 'BUFFER_MAX_PARAM_TABLE|' .. port, 'max_headroom_size'))
if max_headroom_size == nil then
return default_ret
end

local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
local pipeline_delay = tonumber(redis.call('HGET', asic_keys[1], 'pipeline_latency'))
if speed == 400000 then
pipeline_delay = pipeline_delay * 2 - 1
end
accumulative_size = accumulative_size + 2 * pipeline_delay * 1024

-- Fetch all keys in BUFFER_PG according to the port
redis.call('SELECT', appl_db)

local debuginfo = {}

local function get_number_of_pgs(keyname)
local range = string.match(keyname, "Ethernet%d+:([^%s]+)$")
local size
if range == nil then
table.insert(debuginfo, "debug:invalid pg:" .. keyname)
return 0
end
if string.len(range) == 1 then
size = 1
else
size = 1 + tonumber(string.sub(range, -1)) - tonumber(string.sub(range, 1, 1))
end
return size
end

local no_input_pg = true
if new_pg ~= nil then
if get_number_of_pgs(new_pg) ~= 0 then
no_input_pg = false
new_pg = 'BUFFER_PG_TABLE:' .. new_pg
end
end

-- Fetch all the PGs, accumulate the sizes
-- Assume there is only one lossless profile configured among all PGs on each port
table.insert(debuginfo, 'debug:other overhead:' .. accumulative_size)
local pg_keys = redis.call('KEYS', 'BUFFER_PG_TABLE:' .. port .. ':*')
for i = 1, #pg_keys do
local profile = string.sub(redis.call('HGET', pg_keys[i], 'profile'), 2, -2)
local current_profile_size
if profile ~= 'BUFFER_PROFILE_TABLE:ingress_lossy_profile' and (no_input_pg or new_pg ~= pg_keys[i]) then
if profile ~= input_profile_name and not no_input_pg then
local referenced_profile = redis.call('HGETALL', profile)
for j = 1, #referenced_profile, 2 do
if referenced_profile[j] == 'size' then
current_profile_size = tonumber(referenced_profile[j+1])
end
end
else
current_profile_size = input_profile_size
profile = input_profile_name
end
accumulative_size = accumulative_size + current_profile_size * get_number_of_pgs(pg_keys[i])
table.insert(debuginfo, 'debug:' .. pg_keys[i] .. ':' .. profile .. ':' .. current_profile_size .. ':' .. get_number_of_pgs(pg_keys[i]) .. ':accu:' .. accumulative_size)
end
end

if not no_input_pg then
accumulative_size = accumulative_size + input_profile_size * get_number_of_pgs(new_pg)
table.insert(debuginfo, 'debug:' .. new_pg .. '*:' .. input_profile_name .. ':' .. input_profile_size .. ':' .. get_number_of_pgs(new_pg) .. ':accu:' .. accumulative_size)
end

if max_headroom_size > accumulative_size then
table.insert(ret, "result:true")
else
table.insert(ret, "result:false")
end

table.insert(ret, "debug:max headroom:" .. max_headroom_size)
table.insert(ret, "debug:accumulative headroom:" .. accumulative_size)

for i = 1, #debuginfo do
table.insert(ret, debuginfo[i])
end

return ret
1 change: 1 addition & 0 deletions cfgmgr/buffer_check_headroom_vs.lua
129 changes: 129 additions & 0 deletions cfgmgr/buffer_headroom_mellanox.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
-- KEYS - profile name
-- ARGV[1] - port speed
-- ARGV[2] - cable length
-- ARGV[3] - port mtu
-- ARGV[4] - gearbox delay

-- parameters retried from databases:
-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
-- small packet percentage: the parameter which is used to control worst case regarding the cell utilization
-- mtu: the mtu of lossless packet
-- From STATE_DB.ASIC_TABLE:
-- cell size: cell_size of the ASIC
-- pipeline_latency: the latency
-- mac_phy_delay:
-- peer_response_time:

local lossless_mtu
local small_packet_percentage
local cell_size
local pipeline_latency
local mac_phy_delay
local peer_response_time

local port_speed = tonumber(ARGV[1])
local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
local port_mtu = tonumber(ARGV[3])
local gearbox_delay = tonumber(ARGV[4])

local appl_db = "0"
local config_db = "4"
local state_db = "6"

local ret = {}

if gearbox_delay == nil then
gearbox_delay = 0
end

-- Fetch ASIC info from ASIC table in STATE_DB
redis.call('SELECT', state_db)
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')

-- Only one key should exist
local asic_table_content = redis.call('HGETALL', asic_keys[1])
for i = 1, #asic_table_content, 2 do
if asic_table_content[i] == "cell_size" then
cell_size = tonumber(asic_table_content[i+1])
end
if asic_table_content[i] == "pipeline_latency" then
pipeline_latency = tonumber(asic_table_content[i+1]) * 1024
end
if asic_table_content[i] == "mac_phy_delay" then
mac_phy_delay = tonumber(asic_table_content[i+1]) * 1024
end
if asic_table_content[i] == "peer_response_time" then
peer_response_time = tonumber(asic_table_content[i+1]) * 1024
end
end

-- Fetch lossless traffic info from CONFIG_DB
redis.call('SELECT', config_db)
local lossless_traffic_keys = redis.call('KEYS', 'LOSSLESS_TRAFFIC_PATTERN*')

-- Only one key should exist
local lossless_traffic_table_content = redis.call('HGETALL', lossless_traffic_keys[1])
for i = 1, #lossless_traffic_table_content, 2 do
if lossless_traffic_table_content[i] == "mtu" then
lossless_mtu = tonumber(lossless_traffic_table_content[i+1])
end
if lossless_traffic_table_content[i] == "small_packet_percentage" then
small_packet_percentage = tonumber(lossless_traffic_table_content[i+1])
end
end

-- Fetch DEFAULT_LOSSLESS_BUFFER_PARAMETER from CONFIG_DB
local lossless_traffic_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')

-- Calculate the headroom information
local speed_of_light = 198000000
local minimal_packet_size = 64
local cell_occupancy
local worst_case_factor
local propagation_delay
local bytes_on_cable
local bytes_on_gearbox
local xoff_value
local xon_value
local headroom_size
local speed_overhead

-- Adjustment for 400G
if port_speed == 400000 then
pipeline_latency = 37 * 1024
speed_overhead = port_mtu
else
speed_overhead = 0
end

if cell_size > 2 * minimal_packet_size then
worst_case_factor = cell_size / minimal_packet_size
else
worst_case_factor = (2 * cell_size) / (1 + cell_size)
end

cell_occupancy = (100 - small_packet_percentage + small_packet_percentage * worst_case_factor) / 100

if (gearbox_delay == 0) then
bytes_on_gearbox = 0
else
bytes_on_gearbox = port_speed * gearbox_delay / (8 * 1024)
end

bytes_on_cable = 2 * cable_length * port_speed * 1000000000 / speed_of_light / (8 * 1024)
propagation_delay = port_mtu + bytes_on_cable + 2 * bytes_on_gearbox + mac_phy_delay + peer_response_time

-- Calculate the xoff and xon and then round up at 1024 bytes
xoff_value = lossless_mtu + propagation_delay * cell_occupancy
xoff_value = math.ceil(xoff_value / 1024) * 1024
xon_value = pipeline_latency
xon_value = math.ceil(xon_value / 1024) * 1024

headroom_size = xoff_value + xon_value + speed_overhead
headroom_size = math.ceil(headroom_size / 1024) * 1024

table.insert(ret, "xon" .. ":" .. math.ceil(xon_value))
table.insert(ret, "xoff" .. ":" .. math.ceil(xoff_value))
table.insert(ret, "size" .. ":" .. math.ceil(headroom_size))

return ret
1 change: 1 addition & 0 deletions cfgmgr/buffer_headroom_vs.lua
Loading