Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cisco-8000 pfc-wd support #1748

Merged
merged 7 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ dist_swss_DATA = \
pfc_detect_broadcom.lua \
pfc_detect_barefoot.lua \
pfc_detect_nephos.lua \
pfc_detect_cisco-8000.lua \
pfc_restore.lua \
pfc_restore_cisco-8000.lua \
port_rates.lua \
watermark_queue.lua \
watermark_pg.lua \
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const char state_db_key_delimiter = '|';
#define VS_PLATFORM_SUBSTRING "vs"
#define NPS_PLATFORM_SUBSTRING "nephos"
#define MRVL_PLATFORM_SUBSTRING "marvell"
#define CISCO_8000_PLATFORM_SUBSTRING "cisco-8000"

#define CONFIGDB_KEY_SEPARATOR "|"
#define DEFAULT_KEY_SEPARATOR ":"
Expand Down
21 changes: 21 additions & 0 deletions orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,27 @@ bool OrchDaemon::init()
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
} else if (platform == CISCO_8000_PLATFORM_SUBSTRING)
{
static const vector<sai_port_stat_t> portStatIds;

static const vector<sai_queue_stat_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
};

static const vector<sai_queue_attr_t> queueAttrIds =
{
SAI_QUEUE_ATTR_PAUSE_STATUS,
};

m_orchList.push_back(new PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdActionHandler>(
m_configDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
}

m_orchList.push_back(&CounterCheckOrch::getInstance(m_configDb));
Expand Down
76 changes: 76 additions & 0 deletions orchagent/pfc_detect_cisco-8000.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval (milliseconds)
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3]) * 1000

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])

-- Get PFC status
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

if packets and queue_pause_status then

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- Check actual condition of queue being in PFC storm
if (queue_pause_status == 'true')
-- DEBUG CODE START. Uncomment to enable
or (debug_storm == "enabled")
-- DEBUG CODE END.
then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
neethajohn marked this conversation as resolved.
Show resolved Hide resolved
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
end
end
end
end

return rets
62 changes: 62 additions & 0 deletions orchagent/pfc_restore_cisco-8000.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval (milliseconds)
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3]) * 1000

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local restoration_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then
restoration_time = tonumber(restoration_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
if not time_left then
time_left = restoration_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- Check actual condition of queue being restored from PFC storm
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

if (queue_pause_status == 'false')
-- DEBUG CODE START. Uncomment to enable
and (debug_storm ~= "enabled")
-- DEBUG CODE END.
then
if time_left <= 0 then
neethajohn marked this conversation as resolved.
Show resolved Hide resolved
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
time_left = restoration_time
else
time_left = time_left - poll_time
end
else
time_left = restoration_time
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT', time_left)
end
end

return rets
76 changes: 76 additions & 0 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,82 @@ void PfcWdActionHandler::updateWdCounters(const string& queueIdStr, const PfcWdQ
m_countersTable->set(queueIdStr, resultFvValues);
}

PfcWdSaiDlrInitHandler::PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdActionHandler(port, queue, queueId, countersTable)
{
SWSS_LOG_ENTER();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = true;

// Set DLR init to true to start PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(queue, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d",
port, queue, queueId, status);
return;
}
}

PfcWdSaiDlrInitHandler::~PfcWdSaiDlrInitHandler(void)
{
SWSS_LOG_ENTER();

sai_object_id_t port = getPort();
sai_object_id_t queue = getQueue();
uint8_t queueId = getQueueId();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = false;

// Set DLR init to false to stop PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(getQueue(), &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to clear PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d", port, queue, queueId, status);
return;
}
}

bool PfcWdSaiDlrInitHandler::getHwCounters(PfcWdHwStats& counters)
{
SWSS_LOG_ENTER();

static const vector<sai_stat_id_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
SAI_QUEUE_STAT_DROPPED_PACKETS,
};

vector<uint64_t> queueStats;
queueStats.resize(queueStatIds.size());

sai_status_t status = sai_queue_api->get_queue_stats(
getQueue(),
static_cast<uint32_t>(queueStatIds.size()),
queueStatIds.data(),
queueStats.data());

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to fetch queue 0x%" PRIx64 " stats: %d", getQueue(), status);
return false;
}

counters.txPkt = queueStats[0];
counters.txDropPkt = queueStats[1];
counters.rxPkt = 0;
counters.rxDropPkt = 0;

return true;
}

PfcWdAclHandler::PfcWdAclHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdLossyHandler(port, queue, queueId, countersTable)
Expand Down
11 changes: 11 additions & 0 deletions orchagent/pfcactionhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,15 @@ class PfcWdZeroBufferHandler: public PfcWdLossyHandler
sai_object_id_t m_originalPgBufferProfile = SAI_NULL_OBJECT_ID;
};

// PFC queue that implements drop action by draining queue via SAI
// attribute SAI_QUEUE_ATTR_PFC_DLR_INIT.
class PfcWdSaiDlrInitHandler: public PfcWdActionHandler
{
public:
PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable);
virtual ~PfcWdSaiDlrInitHandler(void);
virtual bool getHwCounters(PfcWdHwStats& counters);
};

#endif
29 changes: 19 additions & 10 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,15 @@ template <typename DropHandler, typename ForwardHandler>
PfcWdOrch<DropHandler, ForwardHandler>::PfcWdOrch(DBConnector *db, vector<string> &tableNames):
Orch(db, tableNames),
m_countersDb(new DBConnector("COUNTERS_DB", 0)),
m_countersTable(new Table(m_countersDb.get(), COUNTERS_TABLE))
m_countersTable(new Table(m_countersDb.get(), COUNTERS_TABLE)),
m_platform(getenv("platform") ? getenv("platform") : "")
{
SWSS_LOG_ENTER();
if (m_platform == "")
{
SWSS_LOG_ERROR("Platform environment variable is not defined");
return;
}
}


Expand Down Expand Up @@ -219,6 +225,10 @@ task_process_status PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const st
SWSS_LOG_ERROR("Invalid PFC Watchdog action %s", value.c_str());
return task_process_status::task_invalid_entry;
}
if ((m_platform == CISCO_8000_PLATFORM_SUBSTRING) && (action == PfcWdAction::PFC_WD_ACTION_FORWARD)) {
neethajohn marked this conversation as resolved.
Show resolved Hide resolved
SWSS_LOG_ERROR("Unsupported action %s for platform %s", value.c_str(), m_platform.c_str());
return task_process_status::task_invalid_entry;
}
}
else
{
Expand Down Expand Up @@ -657,16 +667,14 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(
{
SWSS_LOG_ENTER();

string platform = getenv("platform") ? getenv("platform") : "";
if (platform == "")
{
SWSS_LOG_ERROR("Platform environment variable is not defined");
return;
}

string detectSha, restoreSha;
string detectPluginName = "pfc_detect_" + platform + ".lua";
string restorePluginName = "pfc_restore.lua";
string detectPluginName = "pfc_detect_" + this->m_platform + ".lua";
string restorePluginName;
if (this->m_platform == CISCO_8000_PLATFORM_SUBSTRING) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this cover both fixed and chassis sub-system ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes for now. If there is any change in future, we will update it here.

restorePluginName = "pfc_restore_" + this->m_platform + ".lua";
} else {
restorePluginName = "pfc_restore.lua";
}

try
{
Expand Down Expand Up @@ -1056,3 +1064,4 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::bake()
// Trick to keep member functions in a separate file
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdActionHandler>;
1 change: 1 addition & 0 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class PfcWdOrch: public Orch

protected:
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;
string m_platform = "";

private:

Expand Down