From 5fa2329b895836e9081f2db3aad4219935f79d77 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Mon, 8 Feb 2021 05:19:49 +0800 Subject: [PATCH] Support shared headroom pool on top of dynamic buffer calculation (#1581) * Support shared headroom pool on top of dynamic buffer calculation - Feature is enabled/disabled on-the-fly by configuring over-subscribe-ration and shared headroom pool size. If both are configured, the shared headroom pool size will take effect. When turn on/off the feature, all the lossless profiles and buffer pool size will be recalculated. - Support calculating shared headroom pool while ingress lossless pool is statically configured. - Check accumulative headroom before toggling SHP state To disable SHP results in size of PG increasing. Hence needs to check whether accumulative headroom exceed limit - Split the function doUpdateStaticProfileTask into two functions Originally it was called for static profile only and consisted of two parts: - One is for dynamic th updated. It will go over all the buffer profiles dynamically generated according to the dynamic th and update them - The other is for size updated. It will go over each port referencing the profile and check whether the accumulative headroom exceeds limit Now that it is also called by shared headroom pool, we split it into two functions to make it more clear Signed-off-by: Stephen Sun How I verified it Run vs test and regression test. --- cfgmgr/buffer_headroom_mellanox.lua | 22 +- cfgmgr/buffer_pool_mellanox.lua | 57 ++++- cfgmgr/buffermgrdyn.cpp | 309 +++++++++++++++++++++++++--- cfgmgr/buffermgrdyn.h | 22 +- tests/test_buffer_dynamic.py | 92 +++++++++ 5 files changed, 465 insertions(+), 37 deletions(-) diff --git a/cfgmgr/buffer_headroom_mellanox.lua b/cfgmgr/buffer_headroom_mellanox.lua index cbc6d2d6ec8f..2c9581400816 100644 --- a/cfgmgr/buffer_headroom_mellanox.lua +++ b/cfgmgr/buffer_headroom_mellanox.lua @@ -16,6 +16,7 @@ local lossless_mtu local small_packet_percentage +local over_subscribe_ratio = 0 local cell_size local pipeline_latency local mac_phy_delay @@ -72,8 +73,19 @@ for i = 1, #lossless_traffic_table_content, 2 do end end --- Fetch DEFAULT_LOSSLESS_BUFFER_PARAMETER from CONFIG_DB -local lossless_traffic_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +-- Fetch over subscribe ratio +local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio')) + +-- Fetch the shared headroom pool size +local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff')) + +local shp_enabled +if shp_size ~= nil and shp_size ~= 0 or over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then + shp_enabled = true +else + shp_enabled = false +end -- Calculate the headroom information local speed_of_light = 198000000 @@ -119,7 +131,11 @@ xoff_value = math.ceil(xoff_value / 1024) * 1024 xon_value = pipeline_latency xon_value = math.ceil(xon_value / 1024) * 1024 -headroom_size = xoff_value + xon_value + speed_overhead +if shp_enabled then + headroom_size = xon_value +else + headroom_size = xoff_value + xon_value + speed_overhead +end headroom_size = math.ceil(headroom_size / 1024) * 1024 table.insert(ret, "xon" .. ":" .. math.ceil(xon_value)) diff --git a/cfgmgr/buffer_pool_mellanox.lua b/cfgmgr/buffer_pool_mellanox.lua index da88f186c0a1..9adbb15a6a16 100644 --- a/cfgmgr/buffer_pool_mellanox.lua +++ b/cfgmgr/buffer_pool_mellanox.lua @@ -83,6 +83,24 @@ end local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size') +-- Whether shared headroom pool is enabled? +local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio')) + +-- Fetch the shared headroom pool size +local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff')) + +local shp_enabled = false +if over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then + shp_enabled = true +end + +if shp_size ~= nil and shp_size ~= 0 then + shp_enabled = true +else + shp_size = 0 +end + -- Switch to APPL_DB redis.call('SELECT', appl_db) @@ -103,6 +121,7 @@ local statistics = {} -- Fetch sizes of all of the profiles, accumulate them local accumulative_occupied_buffer = 0 +local accumulative_xoff = 0 for i = 1, #profiles, 1 do if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then local size = tonumber(redis.call('HGET', profiles[i][1], 'size')) @@ -114,6 +133,13 @@ for i = 1, #profiles, 1 do profiles[i][2] = count_up_port end if size ~= 0 then + if shp_enabled and shp_size == 0 then + local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon')) + local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff')) + if xon ~= nil and xoff ~= nil and xon + xoff > size then + accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2] + end + end accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2] end table.insert(statistics, {profiles[i][1], size, profiles[i][2]}) @@ -138,7 +164,7 @@ end local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size')) --- Align mmu_size at cell size boundary, otherwith the sdk will complain and the syncd will faill +-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail local number_of_cells = math.floor(mmu_size / cell_size) local ceiling_mmu_size = number_of_cells * cell_size @@ -149,11 +175,16 @@ redis.call('SELECT', config_db) local pools_need_update = {} local ipools = redis.call('KEYS', 'BUFFER_POOL|ingress*') local ingress_pool_count = 0 +local ingress_lossless_pool_size = nil for i = 1, #ipools, 1 do local size = tonumber(redis.call('HGET', ipools[i], 'size')) if not size then table.insert(pools_need_update, ipools[i]) ingress_pool_count = ingress_pool_count + 1 + else + if ipools[i] == 'BUFFER_POOL|ingress_lossless_pool' and shp_enabled and shp_size == 0 then + ingress_lossless_pool_size = size + end end end @@ -165,7 +196,14 @@ for i = 1, #epools, 1 do end end +if shp_enabled and shp_size == 0 then + shp_size = math.ceil(accumulative_xoff / over_subscribe_ratio) +end + local pool_size +if shp_size then + accumulative_occupied_buffer = accumulative_occupied_buffer + shp_size +end if ingress_pool_count == 1 then pool_size = mmu_size - accumulative_occupied_buffer else @@ -176,18 +214,31 @@ if pool_size > ceiling_mmu_size then pool_size = ceiling_mmu_size end +local shp_deployed = false for i = 1, #pools_need_update, 1 do local pool_name = string.match(pools_need_update[i], "BUFFER_POOL|([^%s]+)$") - table.insert(result, pool_name .. ":" .. math.ceil(pool_size)) + if shp_size ~= 0 and pool_name == "ingress_lossless_pool" then + table.insert(result, pool_name .. ":" .. math.ceil(pool_size) .. ":" .. math.ceil(shp_size)) + shp_deployed = true + else + table.insert(result, pool_name .. ":" .. math.ceil(pool_size)) + end +end + +if not shp_deployed and shp_size ~= 0 and ingress_lossless_pool_size ~= nil then + table.insert(result, "ingress_lossless_pool:" .. math.ceil(ingress_lossless_pool_size) .. ":" .. math.ceil(shp_size)) end table.insert(result, "debug:mmu_size:" .. mmu_size) -table.insert(result, "debug:accumulative:" .. accumulative_occupied_buffer) +table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer) for i = 1, #statistics do table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3]) end table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g) table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size) table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead) +table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled)) +table.insert(result, "debug:shp_size:" .. shp_size) +table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff) return result diff --git a/cfgmgr/buffermgrdyn.cpp b/cfgmgr/buffermgrdyn.cpp index 9039aea734bd..bff7acfd7d83 100644 --- a/cfgmgr/buffermgrdyn.cpp +++ b/cfgmgr/buffermgrdyn.cpp @@ -274,16 +274,16 @@ string BufferMgrDynamic::getPgPoolMode() } // Meta flows which are called by main flows -void BufferMgrDynamic::calculateHeadroomSize(const string &speed, const string &cable, const string &port_mtu, const string &gearbox_model, buffer_profile_t &headroom) +void BufferMgrDynamic::calculateHeadroomSize(buffer_profile_t &headroom) { // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size and threshold vector keys = {}; vector argv = {}; keys.emplace_back(headroom.name); - argv.emplace_back(speed); - argv.emplace_back(cable); - argv.emplace_back(port_mtu); + argv.emplace_back(headroom.speed); + argv.emplace_back(headroom.cable_length); + argv.emplace_back(headroom.port_mtu); argv.emplace_back(m_identifyGearboxDelay); try @@ -316,6 +316,14 @@ void BufferMgrDynamic::calculateHeadroomSize(const string &speed, const string & } } +// This function is designed to fetch the sizes of shared buffer pool and shared headroom pool +// and programe them to APPL_DB if they differ from the current value. +// The function is called periodically: +// 1. Fetch the sizes by calling lug plugin +// - For each of the pools, it checks the size of shared buffer pool. +// - For ingress_lossless_pool, it checks the size of the shared headroom pool (field xoff of the pool) as well. +// 2. Compare the fetched value and the previous value +// 3. Program to APPL_DB.BUFFER_POOL_TABLE only if its sizes differ from the stored value void BufferMgrDynamic::recalculateSharedBufferPool() { try @@ -326,8 +334,17 @@ void BufferMgrDynamic::recalculateSharedBufferPool() auto ret = runRedisScript(*m_applDb, m_bufferpoolSha, keys, argv); // The format of the result: - // a list of strings containing key, value pairs with colon as separator + // a list of lines containing key, value pairs with colon as separator // each is the size of a buffer pool + // possible format of each line: + // 1. shared buffer pool only: + // : + // eg: "egress_lossless_pool:12800000" + // 2. shared buffer pool and shared headroom pool, for ingress_lossless_pool only: + // ingress_lossless_pool::, + // eg: "ingress_lossless_pool:3200000:1024000" + // 3. debug information: + // debug: for ( auto i : ret) { @@ -336,12 +353,62 @@ void BufferMgrDynamic::recalculateSharedBufferPool() if ("debug" != poolName) { - auto &pool = m_bufferPoolLookup[pairs[0]]; + // We will handle the sizes of buffer pool update here. + // For the ingress_lossless_pool, there are some dedicated steps for shared headroom pool + // - The sizes of both the shared headroom pool and the shared buffer pool should be taken into consideration + // - In case the shared headroom pool size is statically configured, as it is programmed to APPL_DB during buffer pool handling, + // - any change from lua plugin will be ignored. + // - will handle ingress_lossless_pool in the way all other pools are handled in this case + auto &pool = m_bufferPoolLookup[poolName]; + auto &poolSizeStr = pairs[1]; + auto old_xoff = pool.xoff; + bool xoff_updated = false; + + if (poolName == INGRESS_LOSSLESS_PG_POOL_NAME && !isNonZero(m_configuredSharedHeadroomPoolSize)) + { + // Shared headroom pool size is treated as "updated" if either of the following conditions satisfied: + // - It is legal and differs from the stored value. + // - The lua plugin doesn't return the shared headroom pool size but there is a non-zero value stored + // This indicates the shared headroom pool was enabled by over subscribe ratio and is disabled. + // In this case a "0" will programmed to APPL_DB, indicating the shared headroom pool is disabled. + SWSS_LOG_DEBUG("Buffer pool ingress_lossless_pool xoff: %s, size %s", pool.xoff.c_str(), pool.total_size.c_str()); + + if (pairs.size() > 2) + { + auto &xoffStr = pairs[2]; + if (pool.xoff != xoffStr) + { + unsigned long xoffNum = atol(xoffStr.c_str()); + if (m_mmuSizeNumber > 0 && m_mmuSizeNumber < xoffNum) + { + SWSS_LOG_ERROR("Buffer pool %s: Invalid xoff %s, exceeding the mmu size %s, ignored xoff but the pool size will be updated", + poolName.c_str(), xoffStr.c_str(), m_mmuSize.c_str()); + } + else + { + pool.xoff = xoffStr; + xoff_updated = true; + } + } + } + else + { + if (isNonZero(pool.xoff)) + { + xoff_updated = true; + } + pool.xoff = "0"; + } + } - if (pool.total_size == pairs[1]) + // In general, the APPL_DB should be updated in case any of the following conditions satisfied + // 1. Shared headroom pool size has been updated + // This indicates the shared headroom pool is enabled by configuring over subscribe ratio, + // which means the shared headroom pool size has updated by lua plugin + // 2. The size of the shared buffer pool isn't configured and has been updated by lua plugin + if ((pool.total_size == poolSizeStr || !pool.dynamic_size) && !xoff_updated) continue; - auto &poolSizeStr = pairs[1]; unsigned long poolSizeNum = atol(poolSizeStr.c_str()); if (m_mmuSizeNumber > 0 && m_mmuSizeNumber < poolSizeNum) { @@ -350,10 +417,19 @@ void BufferMgrDynamic::recalculateSharedBufferPool() continue; } + auto old_size = pool.total_size; pool.total_size = poolSizeStr; updateBufferPoolToDb(poolName, pool); - SWSS_LOG_NOTICE("Buffer pool %s had been updated with new size [%s]", poolName.c_str(), pool.total_size.c_str()); + if (!pool.xoff.empty()) + { + SWSS_LOG_NOTICE("Buffer pool %s has been updated: size from [%s] to [%s], xoff from [%s] to [%s]", + poolName.c_str(), old_size.c_str(), pool.total_size.c_str(), old_xoff.c_str(), pool.xoff.c_str()); + } + else + { + SWSS_LOG_NOTICE("Buffer pool %s has been updated: size from [%s] to [%s]", poolName.c_str(), old_size.c_str(), pool.total_size.c_str()); + } } else { @@ -409,15 +485,16 @@ void BufferMgrDynamic::updateBufferPoolToDb(const string &name, const buffer_poo vector fvVector; if (pool.ingress) - fvVector.emplace_back(make_pair("type", "ingress")); + fvVector.emplace_back("type", "ingress"); else - fvVector.emplace_back(make_pair("type", "egress")); + fvVector.emplace_back("type", "egress"); - fvVector.emplace_back(make_pair("mode", pool.mode)); + if (!pool.xoff.empty()) + fvVector.emplace_back("xoff", pool.xoff); - SWSS_LOG_INFO("Buffer pool %s is initialized", name.c_str()); + fvVector.emplace_back("mode", pool.mode); - fvVector.emplace_back(make_pair("size", pool.total_size)); + fvVector.emplace_back("size", pool.total_size); m_applBufferPoolTable.set(name, fvVector); @@ -476,7 +553,7 @@ void BufferMgrDynamic::updateBufferPgToDb(const string &key, const string &profi } // We have to check the headroom ahead of applying them -task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model, string &profile_name) +task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, string &profile_name) { // Create record in BUFFER_PROFILE table @@ -496,12 +573,16 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const return task_process_status::task_need_retry; } + profile.speed = speed; + profile.cable_length = cable_len; + profile.port_mtu = mtu; + profile.gearbox_model = gearbox_model; + // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size // Pay attention, the threshold can contain valid value - calculateHeadroomSize(speed, cable, mtu, gearbox_model, profile); + calculateHeadroomSize(profile); profile.threshold = threshold; - profile.dynamic_calculated = true; profile.static_configured = false; profile.lossless = true; profile.name = profile_name; @@ -512,7 +593,7 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const SWSS_LOG_NOTICE("BUFFER_PROFILE %s has been created successfully", profile_name.c_str()); SWSS_LOG_DEBUG("New profile created %s according to (%s %s %s): xon %s xoff %s size %s", profile_name.c_str(), - speed.c_str(), cable.c_str(), gearbox_model.c_str(), + speed.c_str(), cable_len.c_str(), gearbox_model.c_str(), profile.xon.c_str(), profile.xoff.c_str(), profile.size.c_str()); } else @@ -768,6 +849,106 @@ task_process_status BufferMgrDynamic::refreshPriorityGroupsForPort(const string return task_process_status::task_success; } +void BufferMgrDynamic::refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size) +{ + // The lossless profiles need to be refreshed only if system is switched between SHP and non-SHP + bool need_refresh_profiles = false; + bool shp_enabled_by_size = isNonZero(m_configuredSharedHeadroomPoolSize); + bool shp_enabled_by_ratio = isNonZero(m_overSubscribeRatio); + + /* + * STATE EVENT ACTION + * enabled by size -> config ratio: no action + * enabled by size -> remove ratio: no action + * enabled by size -> remove size: shared headroom pool disabled + * SHP size will be set here (zero) and programmed to APPL_DB in handleBufferPoolTable + * enabled by ratio -> config size: SHP size will be set here (non zero) and programmed to APPL_DB in handleBufferPoolTable + * enabled by ratio -> remove size: SHP size will be set and programmed to APPL_DB during buffer pool size update + * enabled by ratio -> remove ratio: shared headroom pool disabled + * dynamic size: SHP size will be handled and programmed to APPL_DB during buffer pool size update + * static size: SHP size will be set (zero) and programmed to APPL_DB here + * disabled -> config ratio: shared headroom pool enabled. all lossless profiles refreshed + * SHP size will be handled during buffer pool size update + * disabled -> config size: shared headroom pool enabled. all lossless profiles refreshed + * SHP size will be handled here and programmed to APPL_DB during buffer pool table handling + */ + + auto &ingressLosslessPool = m_bufferPoolLookup[INGRESS_LOSSLESS_PG_POOL_NAME]; + if (enable_state_updated_by_ratio) + { + if (shp_enabled_by_size) + { + // enabled by size -> config or remove ratio, no action + SWSS_LOG_INFO("SHP: No need to refresh lossless profiles even if enable state updated by over subscribe ratio, already enabled by SHP size"); + } + else + { + // enabled by ratio -> remove ratio + // disabled -> config ratio + need_refresh_profiles = true; + } + } + + if (enable_state_updated_by_size) + { + if (shp_enabled_by_ratio) + { + // enabled by ratio -> config size, size will be updated (later in this function) + // enabled by ratio -> remove size, no action here, will be handled during buffer pool size update + SWSS_LOG_INFO("SHP: No need to refresh lossless profiles even if enable state updated by SHP size, already enabled by over subscribe ratio"); + } + else + { + // disabled -> config size + // enabled by size -> remove size + need_refresh_profiles = true; + } + } + + if (need_refresh_profiles) + { + SWSS_LOG_NOTICE("Updating dynamic buffer profiles due to shared headroom pool state updated"); + + for (auto it = m_bufferProfileLookup.begin(); it != m_bufferProfileLookup.end(); ++it) + { + auto &name = it->first; + auto &profile = it->second; + if (profile.static_configured) + { + SWSS_LOG_INFO("Non dynamic profile %s skipped", name.c_str()); + continue; + } + SWSS_LOG_INFO("Updating profile %s with speed %s cable length %s mtu %s gearbox model %s", + name.c_str(), + profile.speed.c_str(), profile.cable_length.c_str(), profile.port_mtu.c_str(), profile.gearbox_model.c_str()); + // recalculate the headroom size + calculateHeadroomSize(profile); + if (task_process_status::task_success != doUpdateBufferProfileForSize(profile, false)) + { + SWSS_LOG_ERROR("Failed to update buffer profile %s when toggle shared headroom pool. See previous message for detail. Please adjust the configuration manually", name.c_str()); + } + } + SWSS_LOG_NOTICE("Updating dynamic buffer profiles finished"); + } + + if (shp_enabled_by_size) + { + ingressLosslessPool.xoff = m_configuredSharedHeadroomPoolSize; + if (isNonZero(ingressLosslessPool.total_size)) + updateBufferPoolToDb(INGRESS_LOSSLESS_PG_POOL_NAME, ingressLosslessPool); + } + else if (!shp_enabled_by_ratio && enable_state_updated_by_ratio) + { + // shared headroom pool is enabled by ratio and will be disabled + // need to program APPL_DB because nobody else will take care of it + ingressLosslessPool.xoff = "0"; + if (isNonZero(ingressLosslessPool.total_size)) + updateBufferPoolToDb(INGRESS_LOSSLESS_PG_POOL_NAME, ingressLosslessPool); + } + + checkSharedBufferPoolSize(); +} + // Main flows // Update lossless pg on a port after an PG has been installed on the port @@ -842,13 +1023,13 @@ task_process_status BufferMgrDynamic::doRemovePgTask(const string &pg_key, const return task_process_status::task_success; } -task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t &profile) +task_process_status BufferMgrDynamic::doUpdateBufferProfileForDynamicTh(buffer_profile_t &profile) { const string &profileName = profile.name; auto &profileToMap = profile.port_pgs; set portsChecked; - if (profile.dynamic_calculated) + if (profile.static_configured && profile.dynamic_calculated) { for (auto &key : profileToMap) { @@ -870,7 +1051,19 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t } } } - else + + checkSharedBufferPoolSize(); + + return task_process_status::task_success; +} + +task_process_status BufferMgrDynamic::doUpdateBufferProfileForSize(buffer_profile_t &profile, bool update_pool_size=true) +{ + const string &profileName = profile.name; + auto &profileToMap = profile.port_pgs; + set portsChecked; + + if (!profile.static_configured || !profile.dynamic_calculated) { for (auto &key : profileToMap) { @@ -883,7 +1076,6 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t if (!isHeadroomResourceValid(port, profile)) { - // to do: get the value from application database SWSS_LOG_ERROR("BUFFER_PROFILE %s cannot be updated because %s referencing it violates the resource limitation", profileName.c_str(), key.c_str()); return task_process_status::task_failed; @@ -895,7 +1087,8 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t updateBufferProfileToDb(profileName, profile); } - checkSharedBufferPoolSize(); + if (update_pool_size) + checkSharedBufferPoolSize(); return task_process_status::task_success; } @@ -931,6 +1124,7 @@ task_process_status BufferMgrDynamic::handleBufferMaxParam(KeyOpFieldsValuesTupl task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFieldsValuesTuple &tuple) { string op = kfvOp(tuple); + string newRatio = "0"; if (op == SET_COMMAND) { @@ -939,10 +1133,30 @@ task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFiel if (fvField(i) == "default_dynamic_th") { m_defaultThreshold = fvValue(i); - SWSS_LOG_DEBUG("Handling Buffer Maximum value table field default_dynamic_th value %s", m_defaultThreshold.c_str()); + SWSS_LOG_DEBUG("Handling Buffer parameter table field default_dynamic_th value %s", m_defaultThreshold.c_str()); + } + else if (fvField(i) == "over_subscribe_ratio") + { + newRatio = fvValue(i); + SWSS_LOG_DEBUG("Handling Buffer parameter table field over_subscribe_ratio value %s", fvValue(i).c_str()); } } } + else + { + SWSS_LOG_ERROR("Unsupported command %s received for DEFAULT_LOSSLESS_BUFFER_PARAMETER table", op.c_str()); + return task_process_status::task_failed; + } + + if (newRatio != m_overSubscribeRatio) + { + bool isSHPEnabled = isNonZero(m_overSubscribeRatio); + bool willSHPBeEnabled = isNonZero(newRatio); + SWSS_LOG_INFO("Recalculate shared buffer pool size due to over subscribe ratio has been updated from %s to %s", + m_overSubscribeRatio.c_str(), newRatio.c_str()); + m_overSubscribeRatio = newRatio; + refreshSharedHeadroomPool(isSHPEnabled != willSHPBeEnabled, false); + } return task_process_status::task_success; } @@ -1149,6 +1363,7 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup // 1. Create the corresponding table entries in APPL_DB // 2. Record the table in the internal cache m_bufferPoolLookup buffer_pool_t &bufferPool = m_bufferPoolLookup[pool]; + string newSHPSize = "0"; bufferPool.dynamic_size = true; for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) @@ -1163,7 +1378,7 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup } if (field == buffer_pool_xoff_field_name) { - bufferPool.xoff = value; + newSHPSize = value; } if (field == buffer_pool_mode_field_name) { @@ -1173,10 +1388,46 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup { bufferPool.ingress = (value == buffer_value_ingress); } - fvVector.emplace_back(FieldValueTuple(field, value)); + fvVector.emplace_back(field, value); SWSS_LOG_INFO("Inserting BUFFER_POOL table field %s value %s", field.c_str(), value.c_str()); } - if (!bufferPool.dynamic_size) + + bool dontUpdatePoolToDb = bufferPool.dynamic_size; + if (pool == INGRESS_LOSSLESS_PG_POOL_NAME) + { + /* + * "dontUpdatPoolToDb" is calculated for ingress_lossless_pool according to following rules: + * Don't update | pool size | SHP enabled by size | SHP enabled by over subscribe ratio + * True | Dynamic | Any | Any + * False | Static | True | Any + * True | Static | False | True + * False | Static | False | False + */ + bool willSHPBeEnabledBySize = isNonZero(newSHPSize); + if (newSHPSize != m_configuredSharedHeadroomPoolSize) + { + bool isSHPEnabledBySize = isNonZero(m_configuredSharedHeadroomPoolSize); + + m_configuredSharedHeadroomPoolSize = newSHPSize; + refreshSharedHeadroomPool(false, isSHPEnabledBySize != willSHPBeEnabledBySize); + } + else if (!newSHPSize.empty()) + { + SWSS_LOG_INFO("Shared headroom pool size updated without change (new %s vs current %s), skipped", newSHPSize.c_str(), m_configuredSharedHeadroomPoolSize.c_str()); + } + + if (!willSHPBeEnabledBySize) + { + // Don't need to program APPL_DB if shared headroom pool is enabled + dontUpdatePoolToDb |= isNonZero(m_overSubscribeRatio); + } + } + else if (isNonZero(newSHPSize)) + { + SWSS_LOG_ERROR("Field xoff is supported for %s only, but got for %s, ignored", INGRESS_LOSSLESS_PG_POOL_NAME, pool.c_str()); + } + + if (!dontUpdatePoolToDb) { m_applBufferPoolTable.set(pool, fvVector); m_stateBufferPoolTable.set(pool, fvVector); @@ -1299,12 +1550,12 @@ task_process_status BufferMgrDynamic::handleBufferProfileTable(KeyOpFieldsValues profileApp.state = PROFILE_NORMAL; SWSS_LOG_NOTICE("BUFFER_PROFILE %s is dynamic calculation so it won't be deployed to APPL_DB until referenced by a port", profileName.c_str()); - doUpdateStaticProfileTask(profileApp); + doUpdateBufferProfileForDynamicTh(profileApp); } else { profileApp.state = PROFILE_NORMAL; - doUpdateStaticProfileTask(profileApp); + doUpdateBufferProfileForSize(profileApp); SWSS_LOG_NOTICE("BUFFER_PROFILE %s has been inserted into APPL_DB", profileName.c_str()); SWSS_LOG_DEBUG("BUFFER_PROFILE %s for headroom override has been stored internally: [pool %s xon %s xoff %s size %s]", profileName.c_str(), diff --git a/cfgmgr/buffermgrdyn.h b/cfgmgr/buffermgrdyn.h index 941491e7b6b1..f11bd86201f8 100644 --- a/cfgmgr/buffermgrdyn.h +++ b/cfgmgr/buffermgrdyn.h @@ -47,6 +47,14 @@ typedef struct { bool static_configured; bool ingress; bool lossless; + + // fields representing parameters by which the headroom is calculated + std::string speed; + std::string cable_length; + std::string port_mtu; + std::string gearbox_model; + + // APPL_DB.BUFFER_PROFILE fields std::string name; std::string size; std::string xon; @@ -124,6 +132,8 @@ class BufferMgrDynamic : public Orch bool m_portInitDone; bool m_firstTimeCalculateBufferPool; + std::string m_configuredSharedHeadroomPoolSize; + std::shared_ptr m_applDb = nullptr; SelectableTimer *m_buffermgrPeriodtimer = nullptr; @@ -191,6 +201,8 @@ class BufferMgrDynamic : public Orch unsigned long m_mmuSizeNumber; std::string m_defaultThreshold; + std::string m_overSubscribeRatio; + // Initializers void initTableHandlerMap(); void parseGearboxInfo(std::shared_ptr> gearboxInfo); @@ -202,6 +214,10 @@ class BufferMgrDynamic : public Orch std::string parseObjectNameFromKey(const std::string &key, size_t pos/* = 1*/); std::string parseObjectNameFromReference(const std::string &reference); std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model); + inline bool isNonZero(const std::string &value) const + { + return !value.empty() && value != "0"; + } // APPL_DB table operations void updateBufferPoolToDb(const std::string &name, const buffer_pool_t &pool); @@ -209,19 +225,21 @@ class BufferMgrDynamic : public Orch void updateBufferPgToDb(const std::string &key, const std::string &profile, bool add); // Meta flows - void calculateHeadroomSize(const std::string &speed, const std::string &cable, const std::string &port_mtu, const std::string &gearbox_model, buffer_profile_t &headroom); + void calculateHeadroomSize(buffer_profile_t &headroom); void checkSharedBufferPoolSize(); void recalculateSharedBufferPool(); task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, std::string &profile_name); void releaseProfile(const std::string &profile_name); bool isHeadroomResourceValid(const std::string &port, const buffer_profile_t &profile, const std::string &new_pg); + void refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size); // Main flows task_process_status refreshPriorityGroupsForPort(const std::string &port, const std::string &speed, const std::string &cable_length, const std::string &mtu, const std::string &exactly_matched_key); task_process_status doUpdatePgTask(const std::string &pg_key, const std::string &port); task_process_status doRemovePgTask(const std::string &pg_key, const std::string &port); task_process_status doAdminStatusTask(const std::string port, const std::string adminStatus); - task_process_status doUpdateStaticProfileTask(buffer_profile_t &profile); + task_process_status doUpdateBufferProfileForDynamicTh(buffer_profile_t &profile); + task_process_status doUpdateBufferProfileForSize(buffer_profile_t &profile, bool update_pool_size); // Table update handlers task_process_status handleBufferMaxParam(KeyOpFieldsValuesTuple &t); diff --git a/tests/test_buffer_dynamic.py b/tests/test_buffer_dynamic.py index b0af416c41a4..932247de37d5 100644 --- a/tests/test_buffer_dynamic.py +++ b/tests/test_buffer_dynamic.py @@ -408,3 +408,95 @@ def test_nonDefaultAlpha(self, dvs, testlog): # clear configuration self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|3-4') self.config_db.delete_entry('BUFFER_PROFILE', 'non-default-dynamic') + + def test_sharedHeadroomPool(self, dvs, testlog): + self.setup_db(dvs) + + # configure lossless PG 3-4 on interface and start up the interface + self.config_db.update_entry('BUFFER_PG', 'Ethernet0|3-4', {'profile': 'NULL'}) + dvs.runcmd('config interface startup Ethernet0') + + expectedProfile = self.make_lossless_profile_name(self.originalSpeed, self.originalCableLen) + self.app_db.wait_for_entry("BUFFER_PROFILE_TABLE", expectedProfile) + self.app_db.wait_for_field_match("BUFFER_PG_TABLE", "Ethernet0:3-4", {"profile": "[BUFFER_PROFILE_TABLE:" + expectedProfile + "]"}) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + profileInApplDb = self.app_db.get_entry('BUFFER_PROFILE_TABLE', expectedProfile) + + # enable shared headroom pool by configuring over subscribe ratio + default_lossless_buffer_parameter = self.config_db.get_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE') + over_subscribe_ratio = default_lossless_buffer_parameter.get('over_subscribe_ratio') + assert not over_subscribe_ratio or over_subscribe_ratio == '0', "Over subscribe ratio isn't 0" + + # config over subscribe ratio to 2 + default_lossless_buffer_parameter['over_subscribe_ratio'] = '2' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + + # check buffer profile: xoff should be removed from size + profileInApplDb['size'] = profileInApplDb['xon'] + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # check ingress_lossless_pool between appldb and asicdb + # there are only two lossless PGs configured on one port. + # hence the shared headroom pool size should be pg xoff * 2 / over subscribe ratio (2) = xoff. + ingress_lossless_pool_in_appldb = self.app_db.get_entry('BUFFER_POOL_TABLE', 'ingress_lossless_pool') + shp_size = profileInApplDb['xoff'] + ingress_lossless_pool_in_appldb['xoff'] = shp_size + # toggle shared headroom pool, it requires some time to update pools + time.sleep(20) + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb = self.asic_db.get_entry('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_PROFILE', self.ingress_lossless_pool_oid) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + + # config shared headroom pool size + shp_size = '204800' + ingress_lossless_pool_in_configdb = self.config_db.get_entry('BUFFER_POOL', 'ingress_lossless_pool') + ingress_lossless_pool_in_configdb['xoff'] = shp_size + self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', ingress_lossless_pool_in_configdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # config over subscribe ratio to 4 + default_lossless_buffer_parameter['over_subscribe_ratio'] = '4' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + # shp size wins in case both size and over subscribe ratio is configured + ingress_lossless_pool_in_appldb['xoff'] = shp_size + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove size configuration, new over subscribe ratio takes effect + ingress_lossless_pool_in_configdb['xoff'] = '0' + self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', ingress_lossless_pool_in_configdb) + # shp size: pg xoff * 2 / over subscribe ratio (4) = pg xoff / 2 + shp_size = str(int(int(profileInApplDb['xoff']) / 2)) + time.sleep(30) + ingress_lossless_pool_in_appldb['xoff'] = shp_size + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove over subscribe ratio configuration + default_lossless_buffer_parameter['over_subscribe_ratio'] = '0' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + # check whether shp size has been removed from both asic db and appl db + ingress_lossless_pool_in_appldb['xoff'] = '0' + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = '0' + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is equal to xon + xoff in the profile + profileInApplDb['size'] = str(int(profileInApplDb['xon']) + int(profileInApplDb['xoff'])) + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove lossless PG 3-4 on interface + self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|3-4') + dvs.runcmd('config interface shutdown Ethernet0')