Skip to content

Commit

Permalink
[pfcwd]: Rx/Tx/Drop counters (sonic-net#363)
Browse files Browse the repository at this point in the history
* [pfcwd]: Rx/Tx/Drop counters

Keep track of Tx/Rx/Drop packets during watchdog action.
Record both accumulative values and counters from last event.

Signed-off-by: marian-pritsak <marianp@mellanox.com>

* Address comments

Signed-off-by: marian-pritsak <marianp@mellanox.com>

* Address comments

Signed-off-by: marian-pritsak <marianp@mellanox.com>
  • Loading branch information
marian-pritsak authored and lguohan committed Dec 20, 2017
1 parent 41caa74 commit a043a55
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 22 deletions.
153 changes: 137 additions & 16 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@
#define PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED "PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED"
#define PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED "PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED"

#define SAI_QUEUE_STAT_PACKETS_STR "SAI_QUEUE_STAT_PACKETS"
#define SAI_QUEUE_STAT_DROPPED_PACKETS_STR "SAI_QUEUE_STAT_DROPPED_PACKETS"
#define PFC_WD_QUEUE_STATS_TX_PACKETS "PFC_WD_QUEUE_STATS_TX_PACKETS"
#define PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS "PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS"
#define PFC_WD_QUEUE_STATS_RX_PACKETS "PFC_WD_QUEUE_STATS_RX_PACKETS"
#define PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS "PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS"

#define PFC_WD_QUEUE_STATS_TX_PACKETS_LAST "PFC_WD_QUEUE_STATS_TX_PACKETS_LAST"
#define PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST "PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST"
#define PFC_WD_QUEUE_STATS_RX_PACKETS_LAST "PFC_WD_QUEUE_STATS_RX_PACKETS_LAST"
#define PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST "PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST"

extern sai_object_id_t gSwitchId;
extern PortsOrch *gPortsOrch;
Expand All @@ -35,30 +42,60 @@ PfcWdActionHandler::PfcWdActionHandler(sai_object_id_t port, sai_object_id_t que
"PFC Watchdog detected PFC storm on queue 0x%lx port 0x%lx",
m_queue,
m_port);

m_stats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue));
m_stats.detectCount++;
m_stats.operational = false;

updateWdCounters(sai_serialize_object_id(m_queue), m_stats);
}

PfcWdActionHandler::~PfcWdActionHandler(void)
{
SWSS_LOG_ENTER();

auto finalStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue));

SWSS_LOG_NOTICE(
"Queue 0x%lx port 0x%lx restored from PFC storm. Tx packets: %lu. Dropped packets: %lu",
"Queue 0x%lx port 0x%lx restored from PFC storm.",
m_queue,
m_port,
finalStats.txPkt - m_stats.txPkt,
finalStats.txDropPkt - m_stats.txDropPkt);
m_port);
}

void PfcWdActionHandler::initCounters(void)
{
SWSS_LOG_ENTER();

if (!getHwCounters(m_hwStats))
{
return;
}

auto wdQueueStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue));
wdQueueStats.detectCount++;
wdQueueStats.operational = false;

updateWdCounters(sai_serialize_object_id(m_queue), wdQueueStats);
}

void PfcWdActionHandler::commitCounters(void)
{
SWSS_LOG_ENTER();

PfcWdHwStats hwStats;

if (!getHwCounters(hwStats))
{
return;
}

auto finalStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue));

finalStats.restoreCount++;
finalStats.operational = true;

finalStats.txPktLast = hwStats.txPkt - m_hwStats.txPkt;
finalStats.txDropPktLast = hwStats.txDropPkt - m_hwStats.txDropPkt;
finalStats.rxPktLast = hwStats.rxPkt - m_hwStats.rxPkt;
finalStats.rxDropPktLast = hwStats.rxDropPkt - m_hwStats.rxDropPkt;

finalStats.txPkt += finalStats.txPktLast;
finalStats.txDropPkt += finalStats.txDropPktLast;
finalStats.rxPkt += finalStats.rxPktLast;
finalStats.rxDropPkt += finalStats.rxDropPktLast;

updateWdCounters(sai_serialize_object_id(m_queue), finalStats);
}

Expand All @@ -67,6 +104,8 @@ PfcWdActionHandler::PfcWdQueueStats PfcWdActionHandler::getQueueStats(shared_ptr
SWSS_LOG_ENTER();

PfcWdQueueStats stats;
memset(&stats, 0, sizeof(PfcWdQueueStats));
stats.operational = true;
vector<FieldValueTuple> fieldValues;

if (!countersTable->get(queueIdStr, fieldValues))
Expand All @@ -91,14 +130,22 @@ PfcWdActionHandler::PfcWdQueueStats PfcWdActionHandler::getQueueStats(shared_ptr
{
stats.operational = value == PFC_WD_QUEUE_STATUS_OPERATIONAL ? true : false;
}
else if (field == SAI_QUEUE_STAT_PACKETS_STR)
else if (field == PFC_WD_QUEUE_STATS_TX_PACKETS)
{
stats.txPkt = stoul(value);
}
else if (field == SAI_QUEUE_STAT_DROPPED_PACKETS_STR)
else if (field == PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS)
{
stats.txDropPkt = stoul(value);
}
else if (field == PFC_WD_QUEUE_STATS_RX_PACKETS)
{
stats.rxPkt = stoul(value);
}
else if (field == PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS)
{
stats.rxDropPkt = stoul(value);
}
}

return move(stats);
Expand Down Expand Up @@ -127,6 +174,17 @@ void PfcWdActionHandler::updateWdCounters(const string& queueIdStr, const PfcWdQ

resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED, to_string(stats.detectCount));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED, to_string(stats.restoreCount));

resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_PACKETS, to_string(stats.txPkt));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS, to_string(stats.txDropPkt));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_PACKETS, to_string(stats.rxPkt));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS, to_string(stats.rxDropPkt));

resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_PACKETS_LAST, to_string(stats.txPktLast));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST, to_string(stats.txDropPktLast));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_PACKETS_LAST, to_string(stats.rxPktLast));
resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST, to_string(stats.rxDropPktLast));

resultFvValues.emplace_back(PFC_WD_QUEUE_STATUS, stats.operational ?
PFC_WD_QUEUE_STATUS_OPERATIONAL :
PFC_WD_QUEUE_STATUS_STORMED);
Expand Down Expand Up @@ -292,6 +350,69 @@ PfcWdLossyHandler::~PfcWdLossyHandler(void)
}
}

bool PfcWdLossyHandler::getHwCounters(PfcWdHwStats& counters)
{
SWSS_LOG_ENTER();

static const vector<sai_queue_stat_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
SAI_QUEUE_STAT_DROPPED_PACKETS,
};

static const vector<sai_ingress_priority_group_stat_t> pgStatIds =
{
SAI_INGRESS_PRIORITY_GROUP_STAT_PACKETS,
SAI_INGRESS_PRIORITY_GROUP_STAT_DROPPED_PACKETS,
};

vector<uint64_t> queueStats;
queueStats.resize(queueStatIds.size());

sai_status_t status = sai_queue_api->get_queue_stats(
getQueue(),
static_cast<uint32_t>(queueStatIds.size()),
queueStatIds.data(),
queueStats.data());

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to fetch queue 0x%lx stats: %d", getQueue(), status);
return false;
}

// PG counters not yet supported in Mellanox platform
Port portInstance;
if (!gPortsOrch->getPort(getPort(), portInstance))
{
SWSS_LOG_ERROR("Cannot get port by ID 0x%lx", getPort());
return false;
}

sai_object_id_t pg = portInstance.m_priority_group_ids[getQueueId()];
vector<uint64_t> pgStats;
pgStats.resize(pgStatIds.size());

status = sai_buffer_api->get_ingress_priority_group_stats(
pg,
static_cast<uint32_t>(pgStatIds.size()),
pgStatIds.data(),
pgStats.data());

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to fetch pg 0x%lx stats: %d", pg, status);
return false;
}

counters.txPkt = queueStats[0];
counters.txDropPkt = queueStats[1];
counters.rxPkt = pgStats[0];
counters.rxDropPkt = pgStats[1];

return true;
}

PfcWdZeroBufferHandler::PfcWdZeroBufferHandler(sai_object_id_t port,
sai_object_id_t queue, uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdLossyHandler(port, queue, queueId, countersTable)
Expand Down
36 changes: 30 additions & 6 deletions orchagent/pfcactionhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ extern "C" {
using namespace std;
using namespace swss;

struct PfcWdHwStats
{
uint64_t txPkt;
uint64_t txDropPkt;
uint64_t rxPkt;
uint64_t rxDropPkt;
};

// PFC queue interface class
// It resembles RAII behavior - pause storm is mitigated (queue is locked) on creation,
// and is restored (queue released) on removal
Expand All @@ -39,15 +47,30 @@ class PfcWdActionHandler
}

static void initWdCounters(shared_ptr<Table> countersTable, const string &queueIdStr);
void initCounters(void);
void commitCounters(void);

virtual bool getHwCounters(PfcWdHwStats& counters)
{
memset(&counters, 0, sizeof(PfcWdHwStats));

return true;
};

private:
struct PfcWdQueueStats
{
uint64_t detectCount = 0;
uint64_t restoreCount = 0;
uint64_t txPkt = 0;
uint64_t txDropPkt = 0;
bool operational = true;
uint64_t detectCount;
uint64_t restoreCount;
uint64_t txPkt;
uint64_t txDropPkt;
uint64_t rxPkt;
uint64_t rxDropPkt;
uint64_t txPktLast;
uint64_t txDropPktLast;
uint64_t rxPktLast;
uint64_t rxDropPktLast;
bool operational;
};

static PfcWdQueueStats getQueueStats(shared_ptr<Table> countersTable, const string &queueIdStr);
Expand All @@ -57,7 +80,7 @@ class PfcWdActionHandler
sai_object_id_t m_queue = SAI_NULL_OBJECT_ID;
uint8_t m_queueId = 0;
shared_ptr<Table> m_countersTable = nullptr;
PfcWdQueueStats m_stats;
PfcWdHwStats m_hwStats;
};

class PfcWdAclHandler: public PfcWdActionHandler
Expand Down Expand Up @@ -87,6 +110,7 @@ class PfcWdLossyHandler: public PfcWdActionHandler
PfcWdLossyHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable);
virtual ~PfcWdLossyHandler(void);
virtual bool getHwCounters(PfcWdHwStats& counters);
};

// PFC queue that implements drop action by draining queue with buffer of zero size
Expand Down
2 changes: 2 additions & 0 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,11 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
{
throw runtime_error("Unknown PFC WD action");
}
entry->second.handler->initCounters();
}
else if (event == "restore")
{
entry->second.handler->commitCounters();
entry->second.handler = nullptr;
}
else
Expand Down

0 comments on commit a043a55

Please sign in to comment.