Skip to content

Commit

Permalink
[pfcwdorch]: Add alert mode broadcom support (sonic-net#343)
Browse files Browse the repository at this point in the history
Signed-off-by: Sihui Han <sihan@microsoft.com>
  • Loading branch information
sihuihan88 authored and Shuotian Cheng committed Oct 13, 2017
1 parent 3f8cfe5 commit e07d86b
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 21 deletions.
3 changes: 2 additions & 1 deletion orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ swssdir = $(datadir)/swss

dist_swss_DATA = \
pfc_detect_mellanox.lua \
pfc_restore_mellanox.lua
pfc_detect_broadcom.lua \
pfc_restore.lua

bin_PROGRAMS = orchagent routeresync

Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const char comma = ',';
const char range_specifier = '-';

#define MLNX_PLATFORM_SUBSTRING "mellanox"
#define BRCM_PLATFORM_SUBSTRING "broadcom"

typedef enum
{
Expand Down
45 changes: 44 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,54 @@ bool OrchDaemon::init()
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES,
};

static const vector<sai_queue_attr_t> queueAttrIds;

m_orchList.push_back(new PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>(
m_applDb,
pfc_wd_tables,
portStatIds,
queueStatIds));
queueStatIds,
queueAttrIds));
}
else if (platform == BRCM_PLATFORM_SUBSTRING)
{
static const vector<sai_port_stat_t> portStatIds =
{
SAI_PORT_STAT_PFC_0_RX_PKTS,
SAI_PORT_STAT_PFC_1_RX_PKTS,
SAI_PORT_STAT_PFC_2_RX_PKTS,
SAI_PORT_STAT_PFC_3_RX_PKTS,
SAI_PORT_STAT_PFC_4_RX_PKTS,
SAI_PORT_STAT_PFC_5_RX_PKTS,
SAI_PORT_STAT_PFC_6_RX_PKTS,
SAI_PORT_STAT_PFC_7_RX_PKTS,
SAI_PORT_STAT_PFC_0_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_1_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_2_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_3_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_4_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_5_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_6_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_7_ON2OFF_RX_PKTS,
};

static const vector<sai_queue_stat_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES,
};

static const vector<sai_queue_attr_t> queueAttrIds =
{
SAI_QUEUE_ATTR_PAUSE_STATUS,
};

m_orchList.push_back(new PfcWdSwOrch<PfcWdActionHandler, PfcWdActionHandler>(
m_applDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds));
}

return true;
Expand Down
91 changes: 91 additions & 0 deletions orchagent/pfc_detect_broadcom.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3])

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS'


-- Get all counters
local occupancy_bytes = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES'))
local packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS'))
local pfc_rx_packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key))
local pfc_on2off = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key))
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last')
local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last')

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_on2off_last = tonumber(pfc_on2off_last)

-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off)
end
end

return rets
6 changes: 5 additions & 1 deletion orchagent/pfc_detect_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ for i = n, 1, -1 do
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
if pfc_wd_status == 'operational' then
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
Expand Down Expand Up @@ -67,6 +68,9 @@ for i = n, 1, -1 do
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local pfc_rx_pkt_key = ''
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
if pfc_wd_status ~= 'operational' then
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' then
local restoration_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
if time_left == nil then
Expand Down
63 changes: 49 additions & 14 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,27 @@ PfcWdAction PfcWdOrch<DropHandler, ForwardHandler>::deserializeAction(const stri
return actionMap.at(key);
}

template <typename DropHandler, typename ForwardHandler>
string PfcWdOrch<DropHandler, ForwardHandler>::serializeAction(const PfcWdAction& action)
{
SWSS_LOG_ENTER();

const map<PfcWdAction, string> actionMap =
{
{ PfcWdAction::PFC_WD_ACTION_FORWARD, "forward" },
{ PfcWdAction::PFC_WD_ACTION_DROP, "drop" },
{ PfcWdAction::PFC_WD_ACTION_ALERT, "alert" },
};

if (actionMap.find(action) == actionMap.end())
{
return "unknown";
}

return actionMap.at(action);
}


template <typename DropHandler, typename ForwardHandler>
void PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
const vector<FieldValueTuple>& data)
Expand Down Expand Up @@ -270,6 +291,8 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
vector<FieldValueTuple> countersFieldValues;
countersFieldValues.emplace_back("PFC_WD_DETECTION_TIME", to_string(detectionTime * 1000));
countersFieldValues.emplace_back("PFC_WD_RESTORATION_TIME", to_string(restorationTime * 1000));
countersFieldValues.emplace_back("PFC_WD_ACTION", this->serializeAction(action));

PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()->set(queueIdStr, countersFieldValues);

// We register our queues in PFC_WD table so that syncd will know that it must poll them
Expand All @@ -281,6 +304,12 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
queueFieldValues.emplace_back(PFC_WD_QUEUE_COUNTER_ID_LIST, str);
}

if (!c_queueAttrIds.empty())
{
string str = counterIdsToStr(c_queueAttrIds, sai_serialize_queue_attr);
queueFieldValues.emplace_back(PFC_WD_QUEUE_ATTR_ID_LIST, str);
}

// Create internal entry
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i));

Expand Down Expand Up @@ -313,18 +342,19 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(
DBConnector *db,
vector<string> &tableNames,
vector<sai_port_stat_t> portStatIds,
vector<sai_queue_stat_t> queueStatIds):
vector<sai_queue_stat_t> queueStatIds,
vector<sai_queue_attr_t> queueAttrIds):
PfcWdOrch<DropHandler,
ForwardHandler>(db, tableNames),
m_pfcWdDb(new DBConnector(PFC_WD_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)),
m_pfcWdTable(new ProducerStateTable(m_pfcWdDb.get(), PFC_WD_STATE_TABLE)),
c_portStatIds(portStatIds),
c_queueStatIds(queueStatIds)
c_queueStatIds(queueStatIds),
c_queueAttrIds(queueAttrIds)
{
SWSS_LOG_ENTER();

string platform = getenv("platform") ? getenv("platform") : "";

if (platform == "")
{
SWSS_LOG_ERROR("Platform environment variable is not defined");
Expand All @@ -333,7 +363,7 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(

string detectSha, restoreSha;
string detectPluginName = "pfc_detect_" + platform + ".lua";
string restorePluginName = "pfc_restore_" + platform + ".lua";
string restorePluginName = "pfc_restore.lua";

try
{
Expand Down Expand Up @@ -428,9 +458,21 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
return;
}

SWSS_LOG_NOTICE("Receive notification, %s", event.c_str());
if (event == "storm")
{
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_DROP)
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
{
if (entry->second.handler == nullptr)
{
entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
entry->first,
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_DROP)
{
entry->second.handler = make_shared<DropHandler>(
entry->second.portId,
Expand All @@ -446,14 +488,6 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
{
entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
entry->first,
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
else
{
throw runtime_error("Unknown PFC WD action");
Expand All @@ -465,7 +499,7 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
}
else
{
SWSS_LOG_ERROR("Received unknown event from plugin");
SWSS_LOG_ERROR("Received unknown event from plugin, %s", event.c_str());
}
}

Expand Down Expand Up @@ -551,3 +585,4 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::endWatchdogThread(void)

// Trick to keep member functions in a separate file
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdActionHandler, PfcWdActionHandler>;
9 changes: 6 additions & 3 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ class PfcWdOrch: public Orch
return m_countersDb;
}

private:
static PfcWdAction deserializeAction(const string& key);
void createEntry(const string& key, const vector<FieldValueTuple>& data);
static string serializeAction(const PfcWdAction &action);
private:
void createEntry(const string& key, const vector<FieldValueTuple>& data);
void deleteEntry(const string& name);

shared_ptr<DBConnector> m_countersDb = nullptr;
Expand All @@ -60,7 +61,8 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
DBConnector *db,
vector<string> &tableNames,
vector<sai_port_stat_t> portStatIds,
vector<sai_queue_stat_t> queueStatIds);
vector<sai_queue_stat_t> queueStatIds,
vector<sai_queue_attr_t> queueAttrIds);
virtual ~PfcWdSwOrch(void);

virtual bool startWdOnPort(const Port& port,
Expand Down Expand Up @@ -97,6 +99,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>

const vector<sai_port_stat_t> c_portStatIds;
const vector<sai_queue_stat_t> c_queueStatIds;
const vector<sai_queue_attr_t> c_queueAttrIds;

shared_ptr<DBConnector> m_pfcWdDb = nullptr;
shared_ptr<ProducerStateTable> m_pfcWdTable = nullptr;
Expand Down

0 comments on commit e07d86b

Please sign in to comment.