Skip to content

Commit

Permalink
Enforce the order when the shared headroom pool is enabled (sonic-net…
Browse files Browse the repository at this point in the history
…#2699)

**What I did**

Enforce the order when the shared headroom pool is enabled.

**Why I did it**

The current flow to enable the shared headroom pool
1. Configure the shared headroom pool size or over-subscribe ratio
2. Update lossless buffer profiles with `xon == size`
3. Calculate and update the shared headroom pool size.

In step 2, the lossless buffer profiles have been updated to values as if the shared headroom pool is enabled. However, it is enabled only in step 3, which is inconsistent between steps 2 and 3. Therefore, we open the PR to guarantee the order.

The new flow
1. A user configures the shared headroom pool size or over-subscribe ratio
2. The dynamic buffer manager invokes the vendor-specific Lua plugin to calculate the shared headroom pool size
    - This is the step introduced in this PR to guarantee the shared headroom pool will be enabled in advance
    - On Nvidia platform, a non-zero shared headroom pool is returned in this stage if the user configures the over-subscribe ratio
3. If a non-zero shared headroom pool is returned, the dynamic buffer manager pushes the shared headroom pool size to APPL_DB.ingress_lossless_pool and blocks until it has been updated into APPL_STATE_DB.ingress_lossless_pool (which indicates the buffer orchagent finishes handling it)
4. The buffer manager updates the lossless buffer profiles
5. The buffer manager invokes the Lua plugin to calculate the shared headroom pool size.
6. The flow continues as normal.

**How I verified it**

Manually test and regression test
  • Loading branch information
stephenxs authored Apr 14, 2023
1 parent 5c43ed0 commit 81b5b4c
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 5 deletions.
12 changes: 11 additions & 1 deletion cfgmgr/buffer_pool_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ for name in pairs(profiles) do
size = size + lossypg_reserved
end
if size ~= 0 then
if shp_enabled and shp_size == 0 then
if shp_size == 0 then
local xon = tonumber(redis.call('HGET', name, 'xon'))
local xoff = tonumber(redis.call('HGET', name, 'xoff'))
if xon ~= nil and xoff ~= nil and xon + xoff > size then
Expand All @@ -346,6 +346,12 @@ accumulative_occupied_buffer = accumulative_occupied_buffer + lossypg_extra_for_

-- Accumulate sizes for private headrooms
local accumulative_private_headroom = 0
local force_enable_shp = false
if accumulative_xoff > 0 and shp_enabled ~= true then
force_enable_shp = true
shp_size = 655360
shp_enabled = true
end
if shp_enabled then
accumulative_private_headroom = lossless_port_count * private_headroom
accumulative_occupied_buffer = accumulative_occupied_buffer + accumulative_private_headroom
Expand Down Expand Up @@ -391,6 +397,9 @@ end

if shp_enabled and shp_size == 0 then
shp_size = math.ceil(accumulative_xoff / over_subscribe_ratio)
if shp_size == 0 then
shp_size = 655360
end
end

local pool_size
Expand Down Expand Up @@ -432,6 +441,7 @@ table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
if shp_enabled then
table.insert(result, "debug:accumulative_private_headroom:" .. accumulative_private_headroom)
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
table.insert(result, "debug:force enabled shp:" .. tostring(force_enable_shp))
end
table.insert(result, "debug:accumulative_mgmt_pg:" .. accumulative_management_pg)
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
Expand Down
4 changes: 3 additions & 1 deletion cfgmgr/buffermgrd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ int main(int argc, char **argv)
WarmStart::initialize("buffermgrd", "swss");
WarmStart::checkWarmStart("buffermgrd", "swss");

DBConnector applStateDb("APPL_STATE_DB", 0);

vector<TableConnector> buffer_table_connectors = {
TableConnector(&cfgDb, CFG_PORT_TABLE_NAME),
TableConnector(&cfgDb, CFG_PORT_CABLE_LEN_TABLE_NAME),
Expand All @@ -202,7 +204,7 @@ int main(int argc, char **argv)
TableConnector(&stateDb, STATE_BUFFER_MAXIMUM_VALUE_TABLE),
TableConnector(&stateDb, STATE_PORT_TABLE_NAME)
};
cfgOrchList.emplace_back(new BufferMgrDynamic(&cfgDb, &stateDb, &applDb, buffer_table_connectors, peripherial_table_ptr, zero_profiles_ptr));
cfgOrchList.emplace_back(new BufferMgrDynamic(&cfgDb, &stateDb, &applDb, &applStateDb, buffer_table_connectors, peripherial_table_ptr, zero_profiles_ptr));
}
else if (!pg_lookup_file.empty())
{
Expand Down
36 changes: 35 additions & 1 deletion cfgmgr/buffermgrdyn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
using namespace std;
using namespace swss;

BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBConnector *applDb, const vector<TableConnector> &tables, shared_ptr<vector<KeyOpFieldsValuesTuple>> gearboxInfo, shared_ptr<vector<KeyOpFieldsValuesTuple>> zeroProfilesInfo) :
BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBConnector *applDb, DBConnector *applStateDb, const vector<TableConnector> &tables, shared_ptr<vector<KeyOpFieldsValuesTuple>> gearboxInfo, shared_ptr<vector<KeyOpFieldsValuesTuple>> zeroProfilesInfo) :
Orch(tables),
m_platform(),
m_bufferDirections{BUFFER_INGRESS, BUFFER_EGRESS},
Expand All @@ -38,6 +38,7 @@ BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBC
m_cfgDefaultLosslessBufferParam(cfgDb, CFG_DEFAULT_LOSSLESS_BUFFER_PARAMETER),
m_cfgDeviceMetaDataTable(cfgDb, CFG_DEVICE_METADATA_TABLE_NAME),
m_applBufferPoolTable(applDb, APP_BUFFER_POOL_TABLE_NAME),
m_applStateBufferPoolTable(applStateDb, APP_BUFFER_POOL_TABLE_NAME),
m_applBufferProfileTable(applDb, APP_BUFFER_PROFILE_TABLE_NAME),
m_applBufferObjectTables{ProducerStateTable(applDb, APP_BUFFER_PG_TABLE_NAME), ProducerStateTable(applDb, APP_BUFFER_QUEUE_TABLE_NAME)},
m_applBufferProfileListTables{ProducerStateTable(applDb, APP_BUFFER_PORT_INGRESS_PROFILE_LIST_NAME), ProducerStateTable(applDb, APP_BUFFER_PORT_EGRESS_PROFILE_LIST_NAME)},
Expand Down Expand Up @@ -1960,6 +1961,13 @@ task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFiel
{
bool isSHPEnabled = isNonZero(m_overSubscribeRatio);
bool willSHPBeEnabled = isNonZero(newRatio);
if (m_portInitDone && (!isSHPEnabled) && willSHPBeEnabled)
{
if (!isSharedHeadroomPoolEnabledInSai())
{
return task_process_status::task_need_retry;
}
}
SWSS_LOG_INFO("Recalculate shared buffer pool size due to over subscribe ratio has been updated from %s to %s",
m_overSubscribeRatio.c_str(), newRatio.c_str());
m_overSubscribeRatio = newRatio;
Expand All @@ -1968,6 +1976,24 @@ task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFiel

return task_process_status::task_success;
}
bool BufferMgrDynamic::isSharedHeadroomPoolEnabledInSai()
{
string xoff;
recalculateSharedBufferPool();
if (!isNonZero(m_bufferPoolLookup[INGRESS_LOSSLESS_PG_POOL_NAME].xoff))
{
return true;
}
m_applBufferPoolTable.flush();
m_applStateBufferPoolTable.hget(INGRESS_LOSSLESS_PG_POOL_NAME, "xoff", xoff);
if (!isNonZero(xoff))
{
SWSS_LOG_INFO("Shared headroom pool is enabled but has not been applied to SAI, retrying");
return false;
}

return true;
}

task_process_status BufferMgrDynamic::handleCableLenTable(KeyOpFieldsValuesTuple &tuple)
{
Expand Down Expand Up @@ -2416,6 +2442,14 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup
{
bool isSHPEnabledBySize = isNonZero(m_configuredSharedHeadroomPoolSize);

if (m_portInitDone && (!isSHPEnabledBySize) && willSHPBeEnabledBySize)
{
if (!isSharedHeadroomPoolEnabledInSai())
{
return task_process_status::task_need_retry;
}
}

m_configuredSharedHeadroomPoolSize = newSHPSize;
refreshSharedHeadroomPool(false, isSHPEnabledBySize != willSHPBeEnabledBySize);
}
Expand Down
4 changes: 3 additions & 1 deletion cfgmgr/buffermgrdyn.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ typedef std::map<std::string, std::string> gearbox_delay_t;
class BufferMgrDynamic : public Orch
{
public:
BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBConnector *applDb, const std::vector<TableConnector> &tables, std::shared_ptr<std::vector<KeyOpFieldsValuesTuple>> gearboxInfo, std::shared_ptr<std::vector<KeyOpFieldsValuesTuple>> zeroProfilesInfo);
BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBConnector *applDb, DBConnector *applStateDb, const std::vector<TableConnector> &tables, std::shared_ptr<std::vector<KeyOpFieldsValuesTuple>> gearboxInfo, std::shared_ptr<std::vector<KeyOpFieldsValuesTuple>> zeroProfilesInfo);
using Orch::doTask;

private:
Expand Down Expand Up @@ -204,6 +204,7 @@ class BufferMgrDynamic : public Orch

// BUFFER_POOL table and cache
ProducerStateTable m_applBufferPoolTable;
Table m_applStateBufferPoolTable;
Table m_stateBufferPoolTable;
buffer_pool_lookup_t m_bufferPoolLookup;

Expand Down Expand Up @@ -294,6 +295,7 @@ class BufferMgrDynamic : public Orch
task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, long lane_count, std::string &profile_name);
void releaseProfile(const std::string &profile_name);
bool isHeadroomResourceValid(const std::string &port, const buffer_profile_t &profile, const std::string &new_pg);
bool isSharedHeadroomPoolEnabledInSai();
void refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size);
task_process_status checkBufferProfileDirection(const std::string &profiles, buffer_direction_t dir);
std::string constructZeroProfileListFromNormalProfileList(const std::string &normalProfileList, const std::string &port);
Expand Down
14 changes: 14 additions & 0 deletions orchagent/bufferorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
string map_type_name = APP_BUFFER_POOL_TABLE_NAME;
string object_name = kfvKey(tuple);
string op = kfvOp(tuple);
string xoff;

SWSS_LOG_DEBUG("object name:%s", object_name.c_str());
if (m_buffer_type_maps[map_type_name]->find(object_name) != m_buffer_type_maps[map_type_name]->end())
Expand Down Expand Up @@ -466,6 +467,7 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
attr.value.u64 = (uint64_t)stoul(value);
attr.id = SAI_BUFFER_POOL_ATTR_XOFF_SIZE;
attribs.push_back(attr);
xoff = value;
}
else
{
Expand Down Expand Up @@ -518,6 +520,15 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
// "FLEX_COUNTER_STATUS"
m_countersDb->hset(COUNTERS_BUFFER_POOL_NAME_MAP, object_name, sai_serialize_object_id(sai_object));
}

// Only publish the result when shared headroom pool is enabled and it has been successfully applied to SAI
if (!xoff.empty())
{
vector<FieldValueTuple> fvs;
fvs.emplace_back("xoff", xoff);
SWSS_LOG_INFO("Publishing the result after applying the shared headroom pool size %s to SAI", xoff.c_str());
m_publisher.publish(APP_BUFFER_POOL_TABLE_NAME, object_name, fvs, ReturnCode(SAI_STATUS_SUCCESS), true);
}
}
else if (op == DEL_COMMAND)
{
Expand Down Expand Up @@ -548,6 +559,9 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
auto it_to_delete = (m_buffer_type_maps[map_type_name])->find(object_name);
(m_buffer_type_maps[map_type_name])->erase(it_to_delete);
m_countersDb->hdel(COUNTERS_BUFFER_POOL_NAME_MAP, object_name);

vector<FieldValueTuple> fvs;
m_publisher.publish(APP_BUFFER_POOL_TABLE_NAME, object_name, fvs, ReturnCode(SAI_STATUS_SUCCESS), true);
}
else
{
Expand Down
3 changes: 2 additions & 1 deletion tests/mock_tests/buffermgrdyn_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace buffermgrdyn_test
shared_ptr<swss::DBConnector> m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
shared_ptr<swss::DBConnector> m_config_db = make_shared<swss::DBConnector>("CONFIG_DB", 0);
shared_ptr<swss::DBConnector> m_state_db = make_shared<swss::DBConnector>("STATE_DB", 0);
shared_ptr<swss::DBConnector> m_app_state_db = make_shared<swss::DBConnector>("APPL_STATE_DB", 0);

BufferMgrDynamic *m_dynamicBuffer;
SelectableTimer m_selectableTable(timespec({ .tv_sec = BUFFERMGR_TIMER_PERIOD, .tv_nsec = 0 }), 0);
Expand Down Expand Up @@ -180,7 +181,7 @@ namespace buffermgrdyn_test
TableConnector(m_state_db.get(), STATE_PORT_TABLE_NAME)
};

m_dynamicBuffer = new BufferMgrDynamic(m_config_db.get(), m_state_db.get(), m_app_db.get(), buffer_table_connectors, nullptr, zero_profile);
m_dynamicBuffer = new BufferMgrDynamic(m_config_db.get(), m_state_db.get(), m_app_db.get(), m_app_state_db.get(), buffer_table_connectors, nullptr, zero_profile);
}

void InitPort(const string &port="Ethernet0", const string &admin_status="up")
Expand Down
22 changes: 22 additions & 0 deletions tests/mock_tests/bufferorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
#include "ut_helper.h"
#include "mock_orchagent_main.h"
#include "mock_table.h"
#include "mock_response_publisher.h"

extern string gMySwitchType;

extern std::unique_ptr<MockResponsePublisher> gMockResponsePublisher;

namespace bufferorch_test
{
Expand All @@ -19,6 +21,7 @@ namespace bufferorch_test
sai_port_api_t *pold_sai_port_api;

shared_ptr<swss::DBConnector> m_app_db;
shared_ptr<swss::DBConnector> m_app_state_db;
shared_ptr<swss::DBConnector> m_config_db;
shared_ptr<swss::DBConnector> m_state_db;
shared_ptr<swss::DBConnector> m_chassis_app_db;
Expand Down Expand Up @@ -113,6 +116,7 @@ namespace bufferorch_test
m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
m_config_db = make_shared<swss::DBConnector>("CONFIG_DB", 0);
m_state_db = make_shared<swss::DBConnector>("STATE_DB", 0);
m_app_state_db = make_shared<swss::DBConnector>("APPL_STATE_DB", 0);
if(gMySwitchType == "voq")
m_chassis_app_db = make_shared<swss::DBConnector>("CHASSIS_APP_DB", 0);

Expand Down Expand Up @@ -317,6 +321,24 @@ namespace bufferorch_test
}
};

TEST_F(BufferOrchTest, BufferOrchTestSharedHeadroomPool)
{
gMockResponsePublisher = std::make_unique<MockResponsePublisher>();

Table bufferPoolTable = Table(m_app_db.get(), APP_BUFFER_POOL_TABLE_NAME);
Table bufferPoolStateTable = Table(m_app_state_db.get(), APP_BUFFER_POOL_TABLE_NAME);

bufferPoolTable.set("ingress_lossless_pool",
{
{"xoff", "10240"}
});
gBufferOrch->addExistingData(&bufferPoolTable);
EXPECT_CALL(*gMockResponsePublisher, publish(APP_BUFFER_POOL_TABLE_NAME, "ingress_lossless_pool", std::vector<FieldValueTuple>{{"xoff", "10240"}}, ReturnCode(SAI_STATUS_SUCCESS), true)).Times(1);
static_cast<Orch *>(gBufferOrch)->doTask();

gMockResponsePublisher.reset();
}

TEST_F(BufferOrchTest, BufferOrchTestBufferPgReferencingObjRemoveThenAdd)
{
vector<string> ts;
Expand Down

0 comments on commit 81b5b4c

Please sign in to comment.