Skip to content

Commit

Permalink
[pfcwd]: support BIG_RED_SWITCH mode (sonic-net#467)
Browse files Browse the repository at this point in the history
* [pfcwd]: enable BIG_RED_SWITCH mode

Signed-off-by: Sihui Han <sihan@microsoft.com>

* update as comments
  • Loading branch information
sihuihan88 authored Apr 13, 2018
1 parent 98c084a commit 046628b
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 31 deletions.
3 changes: 2 additions & 1 deletion orchagent/pfc_detect_broadcom.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ for i = n, 1, -1 do
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
Expand Down
4 changes: 3 additions & 1 deletion orchagent/pfc_detect_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ for i = n, 1, -1 do
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then

local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
Expand Down
3 changes: 2 additions & 1 deletion orchagent/pfc_restore.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ for i = n, 1, -1 do
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local restoration_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
restoration_time = tonumber(restoration_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
if time_left == nil then
Expand Down
22 changes: 0 additions & 22 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,12 @@ PfcWdActionHandler::PfcWdActionHandler(sai_object_id_t port, sai_object_id_t que
m_countersTable(countersTable)
{
SWSS_LOG_ENTER();

Port p;
if (!gPortsOrch->getPort(port, p))
{
SWSS_LOG_ERROR("Unknown port id 0x%lx", port);
}
else
{
m_portAlias = p.m_alias;
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
m_portAlias.c_str(),
m_queueId,
m_queue,
m_port);
}
}

PfcWdActionHandler::~PfcWdActionHandler(void)
{
SWSS_LOG_ENTER();

SWSS_LOG_NOTICE(
"PFC Watchdog storm restored on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
m_portAlias.c_str(),
m_queueId,
m_queue,
m_port);
}

void PfcWdActionHandler::initCounters(void)
Expand Down
212 changes: 207 additions & 5 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define PFC_WD_ACTION "action"
#define PFC_WD_DETECTION_TIME "detection_time"
#define PFC_WD_RESTORATION_TIME "restoration_time"
#define BIG_RED_SWITCH_FIELD "BIG_RED_SWITCH"

#define PFC_WD_DETECTION_TIME_MAX (5 * 1000)
#define PFC_WD_DETECTION_TIME_MIN 100
Expand Down Expand Up @@ -261,6 +262,8 @@ template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
const vector<FieldValueTuple>& data)
{
SWSS_LOG_ENTER();

if (key == PFC_WD_GLOBAL)
{
for (auto valuePair: data)
Expand All @@ -274,6 +277,11 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
fieldValues.emplace_back(POLL_INTERVAL_FIELD, value);
m_flexCounterGroupTable->set(PFC_WD_FLEX_COUNTER_GROUP, fieldValues);
}
else if (field == BIG_RED_SWITCH_FIELD)
{
SWSS_LOG_NOTICE("Recieve brs mode set, %s", value.c_str());
setBigRedSwitchMode(value);
}
}
}
else
Expand All @@ -282,6 +290,166 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
}
}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::setBigRedSwitchMode(const string value)
{
SWSS_LOG_ENTER();

if (value == "enable")
{
// When BIG_RED_SWITCH mode is enabled, pfcwd is automatically disabled
enableBigRedSwitchMode();
}
else if (value == "disable")
{
disableBigRedSwitchMode();
}
else
{
SWSS_LOG_NOTICE("Unsupported BIG_RED_SWITCH mode set input, please use enable or disable");
}

}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::disableBigRedSwitchMode()
{
SWSS_LOG_ENTER();

m_bigRedSwitchFlag = false;
// Disable pfcwdaction hanlder on each queue if exists.
for (auto &entry : m_brsEntryMap)
{

if (entry.second.handler != nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog BIG_RED_SWITCH mode disabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry.second.portAlias.c_str(),
entry.second.index,
entry.first,
entry.second.portId);

entry.second.handler->commitCounters();
entry.second.handler = nullptr;
}

auto queueId = entry.first;
RedisClient redisClient(PfcWdOrch<DropHandler, ForwardHandler>::getCountersDb().get());
string countersKey = COUNTERS_TABLE ":" + sai_serialize_object_id(queueId);
redisClient.hdel(countersKey, "BIG_RED_SWITCH_MODE");
}

m_brsEntryMap.clear();
}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::enableBigRedSwitchMode()
{
SWSS_LOG_ENTER();

m_bigRedSwitchFlag = true;
// Write to database that each queue enables BIG_RED_SWITCH
auto allPorts = gPortsOrch->getAllPorts();
sai_attribute_t attr;
attr.id = SAI_PORT_ATTR_PRIORITY_FLOW_CONTROL;

for (auto &it: allPorts)
{
Port port = it.second;

if (port.m_type != Port::PHY)
{
SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str());
continue;
}

// use portorch api to get lossless tc in future.
sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status);
return;
}

uint8_t pfcMask = attr.value.u8;
for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++)
{
sai_object_id_t queueId = port.m_queue_ids[i];
if ((pfcMask & (1 << i)) == 0 && m_entryMap.find(queueId) == m_entryMap.end())
{
continue;
}

string queueIdStr = sai_serialize_object_id(queueId);

vector<FieldValueTuple> countersFieldValues;
countersFieldValues.emplace_back("BIG_RED_SWITCH_MODE", "enable");
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()->set(queueIdStr, countersFieldValues);
}
}

// Disable pfcwdaction handler on each queue if exists.
for (auto & entry: m_entryMap)
{
if (entry.second.handler != nullptr)
{
entry.second.handler->commitCounters();
entry.second.handler = nullptr;
}
}

// Create pfcwdaction hanlder on all the ports.
for (auto & it: allPorts)
{
Port port = it.second;
if (port.m_type != Port::PHY)
{
SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str());
continue;
}

// use portorch api to get lossless tc in future after asym PFC is available.
sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status);
return;
}

uint8_t pfcMask = attr.value.u8;
for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++)
{
if ((pfcMask & (1 << i)) == 0)
{
continue;
}

sai_object_id_t queueId = port.m_queue_ids[i];
string queueIdStr = sai_serialize_object_id(queueId);

auto entry = m_brsEntryMap.emplace(queueId, PfcWdQueueEntry(PfcWdAction::PFC_WD_ACTION_DROP, port.m_port_id, i, port.m_alias)).first;

if (entry->second.handler== nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog BIG_RED_SWITCH mode enabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry->second.portAlias.c_str(),
entry->second.index,
entry->first,
entry->second.portId);

entry->second.handler = make_shared<DropHandler>(
entry->second.portId,
entry->first,
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
entry->second.handler->initCounters();
}
}
}
}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
uint32_t detectionTime, uint32_t restorationTime, PfcWdAction action)
Expand Down Expand Up @@ -355,7 +523,7 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
}

// Create internal entry
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i));
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i, port.m_alias));

string key = getFlexCounterTableKey(queueIdStr);
m_flexCounterTable->set(key, queueFieldValues);
Expand Down Expand Up @@ -513,10 +681,11 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::~PfcWdSwOrch(void)

template <typename DropHandler, typename ForwardHandler>
PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdQueueEntry::PfcWdQueueEntry(
PfcWdAction action, sai_object_id_t port, uint8_t idx):
PfcWdAction action, sai_object_id_t port, uint8_t idx, string alias):
action(action),
portId(port),
index(idx)
index(idx),
portAlias(alias)
{
SWSS_LOG_ENTER();
}
Expand Down Expand Up @@ -564,12 +733,24 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
}

SWSS_LOG_NOTICE("Receive notification, %s", event.c_str());
if (event == "storm")

if (m_bigRedSwitchFlag)
{
SWSS_LOG_NOTICE("Big_RED_SWITCH mode is on, ingore syncd pfc watchdog notification");
}
else if (event == "storm")
{
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
{
if (entry->second.handler == nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry->second.portAlias.c_str(),
entry->second.index,
entry->first,
entry->second.portId);

entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
entry->first,
Expand All @@ -582,6 +763,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
{
if (entry->second.handler == nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry->second.portAlias.c_str(),
entry->second.index,
entry->first,
entry->second.portId);

entry->second.handler = make_shared<DropHandler>(
entry->second.portId,
entry->first,
Expand All @@ -594,6 +782,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
{
if (entry->second.handler == nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry->second.portAlias.c_str(),
entry->second.index,
entry->first,
entry->second.portId);

entry->second.handler = make_shared<ForwardHandler>(
entry->second.portId,
entry->first,
Expand All @@ -604,13 +799,20 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
}
else
{
throw runtime_error("Unknown PFC WD action");
SWSS_LOG_ERROR("Unknown PFC WD action");
}
}
else if (event == "restore")
{
if (entry->second.handler != nullptr)
{
SWSS_LOG_NOTICE(
"PFC Watchdog storm restored on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.",
entry->second.portAlias.c_str(),
entry->second.index,
entry->first,
entry->second.portId);

entry->second.handler->commitCounters();
entry->second.handler = nullptr;
}
Expand Down
11 changes: 10 additions & 1 deletion orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
PfcWdQueueEntry(
PfcWdAction action,
sai_object_id_t port,
uint8_t idx);
uint8_t idx,
string alias);

PfcWdAction action = PfcWdAction::PFC_WD_ACTION_UNKNOWN;
sai_object_id_t portId = SAI_NULL_OBJECT_ID;
uint8_t index = 0;
string portAlias;
shared_ptr<PfcWdActionHandler> handler = { nullptr };
};

Expand All @@ -95,7 +97,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>

string filterPfcCounters(string counters, set<uint8_t>& losslessTc);
string getFlexCounterTableKey(string s);

void disableBigRedSwitchMode();
void enableBigRedSwitchMode();
void setBigRedSwitchMode(string value);

map<sai_object_id_t, PfcWdQueueEntry> m_entryMap;
map<sai_object_id_t, PfcWdQueueEntry> m_brsEntryMap;

const vector<sai_port_stat_t> c_portStatIds;
const vector<sai_queue_stat_t> c_queueStatIds;
Expand All @@ -105,6 +113,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
shared_ptr<ProducerTable> m_flexCounterTable = nullptr;
shared_ptr<ProducerTable> m_flexCounterGroupTable = nullptr;

bool m_bigRedSwitchFlag = false;
int m_pollInterval;
};

Expand Down

0 comments on commit 046628b

Please sign in to comment.