Skip to content

Commit

Permalink
Fix issue: sometimes PFC WD unable to create zero buffer pool (#2164)
Browse files Browse the repository at this point in the history
What I did
Fix issue: sometimes PFC WD is unable to create zero buffer pool.
On some platforms, an ingress/egress zero buffer profile will be applied on the PG and queue which are under PFC storm. The zero buffer profile is created based on zero buffer pool. However, sometimes it fails to create zero buffer pool due to too many buffer pools existing in the system.
Sometimes, there is a zero buffer pool existing on the system for reclaiming buffer. In that case, we can leverage it to create zero buffer profile for PFC WD.

Why I did it
Fix the issue via sharing the zero buffer pool between PFC WD and buffer orchagent

How I verified it
Manually test
Run PFC WD test and PFC WD warm reboot test
Run unit test

Details if related
The detailed flow is like this:

PFC Storm detected:
If there is a zero pool in PFC WD's cache, just create the zero buffer profile based on it
Otherwise, fetching the zero pool from buffer orchagent
If got one, create the zero buffer profile based on it
Otherwise,
create a zero buffer pool
notify the zero buffer pool about the buffer orch
In both cases, PFC WD should notify buffer orch to increase the reference number of the zero buffer pool.

Buffer orchagent:
When creating the zero buffer pool,
check whether there is one. if yes, skip the SAI API create_buffer_pool
increase the reference number.
Before removing the zero buffer pool, decrease and check the reference number. if it is zero (after decreased), skip SAI API destroy_buffer_pool.
When PFC WD decrease reference number: remove the zero buffer pool if the reference number becomes zero

Notes
We do not leverage the object_reference_map infrastructure to track the dependency because:
it assumes the dependency will eventually be removed if an object is removed. that's NOT true in this scenario because the PFC storm can last for a relatively long time and even cross warm reboot.
the interfaces differ.

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs authored Mar 16, 2022
1 parent 608acc3 commit ad65b0a
Show file tree
Hide file tree
Showing 6 changed files with 531 additions and 52 deletions.
168 changes: 153 additions & 15 deletions orchagent/bufferorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ BufferOrch::BufferOrch(DBConnector *applDb, DBConnector *confDb, DBConnector *st
m_flexCounterTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_TABLE)),
m_flexCounterGroupTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_GROUP_TABLE)),
m_countersDb(new DBConnector("COUNTERS_DB", 0)),
m_stateBufferMaximumValueTable(stateDb, STATE_BUFFER_MAXIMUM_VALUE_TABLE)
m_stateBufferMaximumValueTable(stateDb, STATE_BUFFER_MAXIMUM_VALUE_TABLE),
m_ingressZeroBufferPool(SAI_NULL_OBJECT_ID),
m_egressZeroBufferPool(SAI_NULL_OBJECT_ID),
m_ingressZeroPoolRefCount(0),
m_egressZeroPoolRefCount(0)
{
SWSS_LOG_ENTER();
initTableHandlers();
Expand Down Expand Up @@ -310,6 +314,65 @@ const object_reference_map &BufferOrch::getBufferPoolNameOidMap(void)
return *m_buffer_type_maps[APP_BUFFER_POOL_TABLE_NAME];
}

void BufferOrch::lockZeroBufferPool(bool ingress)
{
if (ingress)
m_ingressZeroPoolRefCount++;
else
m_egressZeroPoolRefCount++;
}

void BufferOrch::unlockZeroBufferPool(bool ingress)
{
sai_object_id_t pool = SAI_NULL_OBJECT_ID;
if (ingress)
{
if (--m_ingressZeroPoolRefCount <= 0)
{
pool = m_ingressZeroBufferPool;
m_ingressZeroBufferPool = SAI_NULL_OBJECT_ID;
}
}
else
{
if (--m_egressZeroPoolRefCount <= 0)
{
pool = m_egressZeroBufferPool;
m_egressZeroBufferPool = SAI_NULL_OBJECT_ID;
}
}

if (pool != SAI_NULL_OBJECT_ID)
{
auto sai_status = sai_buffer_api->remove_buffer_pool(pool);
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to remove buffer pool, rv:%d", sai_status);
task_process_status handle_status = handleSaiRemoveStatus(SAI_API_BUFFER, sai_status);
if (handle_status != task_process_status::task_success)
{
return;
}
}
else
{
SWSS_LOG_NOTICE("Zero buffer pool has been successfully removed");
}
}
}

void BufferOrch::setZeroBufferPool(bool ingress, sai_object_id_t pool)
{
if (ingress)
{
m_ingressZeroBufferPool = pool;
}
else
{
m_egressZeroBufferPool = pool;
}
}

task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
{
SWSS_LOG_ENTER();
Expand All @@ -318,6 +381,8 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
string map_type_name = APP_BUFFER_POOL_TABLE_NAME;
string object_name = kfvKey(tuple);
string op = kfvOp(tuple);
sai_buffer_pool_type_t pool_direction = SAI_BUFFER_POOL_TYPE_INGRESS;
bool creating_zero_pool = false;

SWSS_LOG_DEBUG("object name:%s", object_name.c_str());
if (m_buffer_type_maps[map_type_name]->find(object_name) != m_buffer_type_maps[map_type_name]->end())
Expand All @@ -326,6 +391,17 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
SWSS_LOG_DEBUG("found existing object:%s of type:%s", object_name.c_str(), map_type_name.c_str());
}
SWSS_LOG_DEBUG("processing command:%s", op.c_str());
if (object_name == "ingress_zero_pool")
{
creating_zero_pool = true;
pool_direction = SAI_BUFFER_POOL_TYPE_INGRESS;
}
else if (object_name == "egress_zero_pool")
{
creating_zero_pool = true;
pool_direction = SAI_BUFFER_POOL_TYPE_EGRESS;
}

if (op == SET_COMMAND)
{
vector<sai_attribute_t> attribs;
Expand Down Expand Up @@ -372,6 +448,11 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
return task_process_status::task_invalid_entry;
}
attr.id = SAI_BUFFER_POOL_ATTR_TYPE;
if (creating_zero_pool && pool_direction != static_cast<sai_buffer_pool_type_t>(attr.value.u32))
{
SWSS_LOG_ERROR("Wrong pool direction for pool %s", object_name.c_str());
return task_process_status::task_invalid_entry;
}
attribs.push_back(attr);
}
else if (field == buffer_pool_mode_field_name)
Expand Down Expand Up @@ -437,18 +518,53 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
}
else
{
sai_status = sai_buffer_api->create_buffer_pool(&sai_object, gSwitchId, (uint32_t)attribs.size(), attribs.data());
if (SAI_STATUS_SUCCESS != sai_status)
if (creating_zero_pool)
{
SWSS_LOG_ERROR("Failed to create buffer pool %s with type %s, rv:%d", object_name.c_str(), map_type_name.c_str(), sai_status);
task_process_status handle_status = handleSaiCreateStatus(SAI_API_BUFFER, sai_status);
if (handle_status != task_process_status::task_success)
if (pool_direction == SAI_BUFFER_POOL_TYPE_INGRESS)
{
return handle_status;
sai_object = m_ingressZeroBufferPool;
}
else if (pool_direction == SAI_BUFFER_POOL_TYPE_EGRESS)
{
sai_object = m_egressZeroBufferPool;
}
}

if (SAI_NULL_OBJECT_ID == sai_object)
{
sai_status = sai_buffer_api->create_buffer_pool(&sai_object, gSwitchId, (uint32_t)attribs.size(), attribs.data());
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to create buffer pool %s with type %s, rv:%d", object_name.c_str(), map_type_name.c_str(), sai_status);
task_process_status handle_status = handleSaiCreateStatus(SAI_API_BUFFER, sai_status);
if (handle_status != task_process_status::task_success)
{
return handle_status;
}
}

SWSS_LOG_NOTICE("Created buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
}
else
{
SWSS_LOG_NOTICE("No need to create buffer pool %s since it has been created", object_name.c_str());
}

if (creating_zero_pool)
{
if (pool_direction == SAI_BUFFER_POOL_TYPE_INGRESS)
{
m_ingressZeroPoolRefCount++;
m_ingressZeroBufferPool = sai_object;
}
else if (pool_direction == SAI_BUFFER_POOL_TYPE_EGRESS)
{
m_egressZeroPoolRefCount++;
m_egressZeroBufferPool = sai_object;
}
}

(*(m_buffer_type_maps[map_type_name]))[object_name].m_saiObjectId = sai_object;
SWSS_LOG_NOTICE("Created buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
// Here we take the PFC watchdog approach to update the COUNTERS_DB metadata (e.g., PFC_WD_DETECTION_TIME per queue)
// at initialization (creation and registration phase)
// Specifically, we push the buffer pool name to oid mapping upon the creation of the oid
Expand All @@ -470,18 +586,40 @@ task_process_status BufferOrch::processBufferPool(KeyOpFieldsValuesTuple &tuple)
if (SAI_NULL_OBJECT_ID != sai_object)
{
clearBufferPoolWatermarkCounterIdList(sai_object);
sai_status = sai_buffer_api->remove_buffer_pool(sai_object);
if (SAI_STATUS_SUCCESS != sai_status)
bool remove = true;
if (sai_object == m_ingressZeroBufferPool)
{
SWSS_LOG_ERROR("Failed to remove buffer pool %s with type %s, rv:%d", object_name.c_str(), map_type_name.c_str(), sai_status);
task_process_status handle_status = handleSaiRemoveStatus(SAI_API_BUFFER, sai_status);
if (handle_status != task_process_status::task_success)
if (--m_ingressZeroPoolRefCount > 0)
remove = false;
else
m_ingressZeroBufferPool = SAI_NULL_OBJECT_ID;
}
else if (sai_object == m_egressZeroBufferPool)
{
if (--m_egressZeroPoolRefCount > 0)
remove = false;
else
m_egressZeroBufferPool = SAI_NULL_OBJECT_ID;
}
if (remove)
{
sai_status = sai_buffer_api->remove_buffer_pool(sai_object);
if (SAI_STATUS_SUCCESS != sai_status)
{
return handle_status;
SWSS_LOG_ERROR("Failed to remove buffer pool %s with type %s, rv:%d", object_name.c_str(), map_type_name.c_str(), sai_status);
task_process_status handle_status = handleSaiRemoveStatus(SAI_API_BUFFER, sai_status);
if (handle_status != task_process_status::task_success)
{
return handle_status;
}
}
SWSS_LOG_NOTICE("Removed buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
}
else
{
SWSS_LOG_NOTICE("Will not remove buffer pool %s since it is still referenced", object_name.c_str());
}
}
SWSS_LOG_NOTICE("Removed buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
auto it_to_delete = (m_buffer_type_maps[map_type_name])->find(object_name);
(m_buffer_type_maps[map_type_name])->erase(it_to_delete);
m_countersDb->hdel(COUNTERS_BUFFER_POOL_NAME_MAP, object_name);
Expand Down
13 changes: 13 additions & 0 deletions orchagent/bufferorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ class BufferOrch : public Orch
static type_map m_buffer_type_maps;
void generateBufferPoolWatermarkCounterIdList(void);
const object_reference_map &getBufferPoolNameOidMap(void);
sai_object_id_t getZeroBufferPool(bool ingress)
{
return ingress ? m_ingressZeroBufferPool : m_egressZeroBufferPool;
}

void lockZeroBufferPool(bool ingress);
void unlockZeroBufferPool(bool ingress);
void setZeroBufferPool(bool direction, sai_object_id_t pool);

private:
typedef task_process_status (BufferOrch::*buffer_table_handler)(KeyOpFieldsValuesTuple &tuple);
Expand Down Expand Up @@ -71,6 +79,11 @@ class BufferOrch : public Orch
unique_ptr<DBConnector> m_countersDb;

bool m_isBufferPoolWatermarkCounterIdListGenerated = false;

sai_object_id_t m_ingressZeroBufferPool;
sai_object_id_t m_egressZeroBufferPool;
int m_ingressZeroPoolRefCount;
int m_egressZeroPoolRefCount;
};
#endif /* SWSS_BUFFORCH_H */

86 changes: 60 additions & 26 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "logger.h"
#include "sai_serialize.h"
#include "portsorch.h"
#include "bufferorch.h"
#include <vector>
#include <inttypes.h>

Expand All @@ -26,6 +27,7 @@
extern sai_object_id_t gSwitchId;
extern PortsOrch *gPortsOrch;
extern AclOrch * gAclOrch;
extern BufferOrch *gBufferOrch;
extern sai_port_api_t *sai_port_api;
extern sai_queue_api_t *sai_queue_api;
extern sai_buffer_api_t *sai_buffer_api;
Expand Down Expand Up @@ -715,6 +717,25 @@ PfcWdZeroBufferHandler::ZeroBufferProfile &PfcWdZeroBufferHandler::ZeroBufferPro
return instance;
}

sai_object_id_t& PfcWdZeroBufferHandler::ZeroBufferProfile::getPool(bool ingress)
{
// If there is a cached zero buffer pool, just use it
// else fetch zero buffer pool from buffer orch
// If there is one, use it and increase the reference number.
// otherwise, just return NULL OID
// PfcWdZeroBufferHandler will create it later and notify buffer orch later
auto &poolId = ingress ? m_zeroIngressBufferPool : m_zeroEgressBufferPool;
if (poolId == SAI_NULL_OBJECT_ID)
{
poolId = gBufferOrch->getZeroBufferPool(ingress);
if (poolId != SAI_NULL_OBJECT_ID)
{
gBufferOrch->lockZeroBufferPool(ingress);
}
}
return poolId;
}

sai_object_id_t PfcWdZeroBufferHandler::ZeroBufferProfile::getZeroBufferProfile(bool ingress)
{
SWSS_LOG_ENTER();
Expand All @@ -733,29 +754,39 @@ void PfcWdZeroBufferHandler::ZeroBufferProfile::createZeroBufferProfile(bool ing

sai_attribute_t attr;
vector<sai_attribute_t> attribs;
sai_status_t status;

// Create zero pool
attr.id = SAI_BUFFER_POOL_ATTR_SIZE;
attr.value.u64 = 0;
attribs.push_back(attr);
auto &poolId = getPool(ingress);

attr.id = SAI_BUFFER_POOL_ATTR_TYPE;
attr.value.u32 = ingress ? SAI_BUFFER_POOL_TYPE_INGRESS : SAI_BUFFER_POOL_TYPE_EGRESS;
attribs.push_back(attr);
if (SAI_NULL_OBJECT_ID == poolId)
{
// Create zero pool
attr.id = SAI_BUFFER_POOL_ATTR_SIZE;
attr.value.u64 = 0;
attribs.push_back(attr);

attr.id = SAI_BUFFER_POOL_ATTR_THRESHOLD_MODE;
attr.value.u32 = SAI_BUFFER_POOL_THRESHOLD_MODE_DYNAMIC;
attribs.push_back(attr);
attr.id = SAI_BUFFER_POOL_ATTR_TYPE;
attr.value.u32 = ingress ? SAI_BUFFER_POOL_TYPE_INGRESS : SAI_BUFFER_POOL_TYPE_EGRESS;
attribs.push_back(attr);

attr.id = SAI_BUFFER_POOL_ATTR_THRESHOLD_MODE;
attr.value.u32 = SAI_BUFFER_POOL_THRESHOLD_MODE_STATIC;
attribs.push_back(attr);

sai_status_t status = sai_buffer_api->create_buffer_pool(
&getPool(ingress),
status = sai_buffer_api->create_buffer_pool(
&poolId,
gSwitchId,
static_cast<uint32_t>(attribs.size()),
attribs.data());
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create dynamic zero buffer pool for PFC WD: %d", status);
return;
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create dynamic zero buffer pool for PFC WD: %d", status);
return;
}

// Pass the ownership to BufferOrch
gBufferOrch->setZeroBufferPool(ingress, poolId);
gBufferOrch->lockZeroBufferPool(ingress);
}

// Create zero profile
Expand All @@ -766,15 +797,15 @@ void PfcWdZeroBufferHandler::ZeroBufferProfile::createZeroBufferProfile(bool ing
attribs.push_back(attr);

attr.id = SAI_BUFFER_PROFILE_ATTR_THRESHOLD_MODE;
attr.value.u32 = SAI_BUFFER_PROFILE_THRESHOLD_MODE_DYNAMIC;
attr.value.u32 = SAI_BUFFER_PROFILE_THRESHOLD_MODE_STATIC;
attribs.push_back(attr);

attr.id = SAI_BUFFER_PROFILE_ATTR_BUFFER_SIZE;
attr.value.u64 = 0;
attribs.push_back(attr);

attr.id = SAI_BUFFER_PROFILE_ATTR_SHARED_DYNAMIC_TH;
attr.value.s8 = -8; // ALPHA_0
attr.id = SAI_BUFFER_PROFILE_ATTR_SHARED_STATIC_TH;
attr.value.s8 = 0;
attribs.push_back(attr);

status = sai_buffer_api->create_buffer_profile(
Expand All @@ -793,16 +824,19 @@ void PfcWdZeroBufferHandler::ZeroBufferProfile::destroyZeroBufferProfile(bool in
{
SWSS_LOG_ENTER();

sai_status_t status = sai_buffer_api->remove_buffer_profile(getProfile(ingress));
if (status != SAI_STATUS_SUCCESS)
if (getProfile(ingress) != SAI_NULL_OBJECT_ID)
{
SWSS_LOG_ERROR("Failed to remove static zero buffer profile for PFC WD: %d", status);
return;
sai_status_t status = sai_buffer_api->remove_buffer_profile(getProfile(ingress));
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to remove static zero buffer profile for PFC WD: %d", status);
return;
}
}

status = sai_buffer_api->remove_buffer_pool(getPool(ingress));
if (status != SAI_STATUS_SUCCESS)
auto &pool = ingress ? m_zeroIngressBufferPool : m_zeroEgressBufferPool;
if (pool != SAI_NULL_OBJECT_ID)
{
SWSS_LOG_ERROR("Failed to remove static zero buffer pool for PFC WD: %d", status);
gBufferOrch->unlockZeroBufferPool(ingress);
}
}
Loading

0 comments on commit ad65b0a

Please sign in to comment.