Skip to content

Commit

Permalink
Add buffer pool watermark support (sonic-net#853)
Browse files Browse the repository at this point in the history
* Write fields to FLEX_COUNTER_GROUP_TABLE at the construction of
a BufferOrch object:

"POLL_INTERVAL"
"BUFFER_POLL_PLUGIN_LIST"
"STATS_MODE"

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Update buffer pool name to oid mapping in COUNTERS_DB upon the set and del of its oid

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Push buffer pool watermark COUNTER_ID_LIST to FLEX_COUNTER_TABLE

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Implement user clear logic to buffer pool watermark

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Add periodic clear to buffer pool watermark

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Add lua script for watermark_bufferpool

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Fix syntax error in buffer pool watermark lua script

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Fix compile error in watermarkorch.cpp

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Fix from dut verification

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Add 6000 to read only polling mode

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Touch-up to existing codes

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Remove debugging symbols

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Address comments

Signed-off-by: Wenda Ni <wenni@microsoft.com>

* Address comments

Signed-off-by: Wenda Ni <wenni@microsoft.com>
  • Loading branch information
wendani authored Jun 14, 2019
1 parent 4a67378 commit 4577b40
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 16 deletions.
3 changes: 2 additions & 1 deletion orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ dist_swss_DATA = \
pfc_detect_nephos.lua \
pfc_restore.lua \
watermark_queue.lua \
watermark_pg.lua
watermark_pg.lua \
watermark_bufferpool.lua

bin_PROGRAMS = orchagent routeresync orchagent_restart_check

Expand Down
101 changes: 97 additions & 4 deletions orchagent/bufferorch.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include "tokenize.h"

#include "bufferorch.h"
#include "logger.h"
#include "sai_serialize.h"

#include <sstream>
#include <iostream>

using namespace std;

extern sai_port_api_t *sai_port_api;
extern sai_queue_api_t *sai_queue_api;
extern sai_switch_api_t *sai_switch_api;
Expand All @@ -14,7 +16,13 @@ extern sai_buffer_api_t *sai_buffer_api;
extern PortsOrch *gPortsOrch;
extern sai_object_id_t gSwitchId;

using namespace std;
#define BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS "10000"


static const vector<sai_buffer_pool_stat_t> bufferPoolWatermarkStatIds =
{
SAI_BUFFER_POOL_STAT_WATERMARK_BYTES,
};

type_map BufferOrch::m_buffer_type_maps = {
{CFG_BUFFER_POOL_TABLE_NAME, new object_map()},
Expand All @@ -25,11 +33,18 @@ type_map BufferOrch::m_buffer_type_maps = {
{CFG_BUFFER_PORT_EGRESS_PROFILE_LIST_NAME, new object_map()}
};

BufferOrch::BufferOrch(DBConnector *db, vector<string> &tableNames) : Orch(db, tableNames)
BufferOrch::BufferOrch(DBConnector *db, vector<string> &tableNames) :
Orch(db, tableNames),
m_flexCounterDb(new DBConnector(FLEX_COUNTER_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)),
m_flexCounterTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_TABLE)),
m_flexCounterGroupTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_GROUP_TABLE)),
m_countersDb(new DBConnector(COUNTERS_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)),
m_countersDbRedisClient(m_countersDb.get())
{
SWSS_LOG_ENTER();
initTableHandlers();
initBufferReadyLists(db);
initFlexCounterGroupTable();
};

void BufferOrch::initTableHandlers()
Expand Down Expand Up @@ -82,6 +97,32 @@ void BufferOrch::initBufferReadyList(Table& table)
}
}

void BufferOrch::initFlexCounterGroupTable(void)
{
string bufferPoolWmPluginName = "watermark_bufferpool.lua";

try
{
string bufferPoolLuaScript = swss::loadLuaScript(bufferPoolWmPluginName);
string bufferPoolWmSha = swss::loadRedisScript(m_countersDb.get(), bufferPoolLuaScript);

vector<FieldValueTuple> fvTuples;
fvTuples.emplace_back(BUFFER_POOL_PLUGIN_FIELD, bufferPoolWmSha);
fvTuples.emplace_back(POLL_INTERVAL_FIELD, BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS);

// TODO (work in progress):
// Some platforms do not support buffer pool watermark clear operation on a particular pool
// Invoke the SAI clear_stats API per pool to query the capability from the API call return status
fvTuples.emplace_back(STATS_MODE_FIELD, STATS_MODE_READ_AND_CLEAR);

m_flexCounterGroupTable->set(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP, fvTuples);
}
catch (const runtime_error &e)
{
SWSS_LOG_ERROR("Buffer pool watermark lua script and/or flex counter group not set successfully. Runtime error: %s", e.what());
}
}

bool BufferOrch::isPortReady(const std::string& port_name) const
{
SWSS_LOG_ENTER();
Expand All @@ -105,6 +146,51 @@ bool BufferOrch::isPortReady(const std::string& port_name) const
return result;
}

void BufferOrch::generateBufferPoolWatermarkCounterIdList(void)
{
// This function will be called in FlexCounterOrch when field:value tuple "FLEX_COUNTER_STATUS":"enable"
// is received on buffer pool watermark key under table "FLEX_COUNTER_GROUP_TABLE"
// Because the SubscriberStateTable listens to the entire keyspace of "BUFFER_POOL_WATERMARK", any update
// to field value tuples under key "BUFFER_POOL_WATERMARK" will cause this tuple to be heard again
// To avoid resync the coutner ID list a second time, we introduce a data member variable to mark whether
// this operation has already been done or not yet
if (m_isBufferPoolWatermarkCounterIdListGenerated)
{
return;
}

// Detokenize the SAI watermark stats to a string, separated by comma
string statList;
for (const auto &it : bufferPoolWatermarkStatIds)
{
statList += (sai_serialize_buffer_pool_stat(it) + list_item_delimiter);
}
if (!statList.empty())
{
statList.pop_back();
}

vector<FieldValueTuple> fvTuples;
fvTuples.emplace_back(BUFFER_POOL_COUNTER_ID_LIST, statList);

// Push buffer pool watermark COUNTER_ID_LIST to FLEX_COUNTER_TABLE on a per buffer pool basis
for (const auto &it : *(m_buffer_type_maps[CFG_BUFFER_POOL_TABLE_NAME]))
{
string key = BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP ":" + sai_serialize_object_id(it.second);
m_flexCounterTable->set(key, fvTuples);
}

m_isBufferPoolWatermarkCounterIdListGenerated = true;
}

const object_map &BufferOrch::getBufferPoolNameOidMap(void)
{
// In the case different Orches are running in
// different threads, caller may need to grab a read lock
// before calling this function
return *m_buffer_type_maps[CFG_BUFFER_POOL_TABLE_NAME];
}

task_process_status BufferOrch::processBufferPool(Consumer &consumer)
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -209,6 +295,12 @@ task_process_status BufferOrch::processBufferPool(Consumer &consumer)
}
(*(m_buffer_type_maps[map_type_name]))[object_name] = sai_object;
SWSS_LOG_NOTICE("Created buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
// Here we take the PFC watchdog approach to update the COUNTERS_DB metadata (e.g., PFC_WD_DETECTION_TIME per queue)
// at initialization (creation and registration phase)
// Specifically, we push the buffer pool name to oid mapping upon the creation of the oid
// In pg and queue case, this mapping installment is deferred to FlexCounterOrch at a reception of field
// "FLEX_COUNTER_STATUS"
m_countersDbRedisClient.hset(COUNTERS_BUFFER_POOL_NAME_MAP, object_name, sai_serialize_object_id(sai_object));
}
}
else if (op == DEL_COMMAND)
Expand All @@ -222,6 +314,7 @@ task_process_status BufferOrch::processBufferPool(Consumer &consumer)
SWSS_LOG_NOTICE("Removed buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str());
auto it_to_delete = (m_buffer_type_maps[map_type_name])->find(object_name);
(m_buffer_type_maps[map_type_name])->erase(it_to_delete);
m_countersDbRedisClient.hdel(COUNTERS_BUFFER_POOL_NAME_MAP, object_name);
}
else
{
Expand Down Expand Up @@ -370,7 +463,7 @@ task_process_status BufferOrch::processBufferProfile(Consumer &consumer)
}

/*
Input sample "BUFFER_QUEUE_TABLE:Ethernet4,Ethernet45:10-15"
Input sample "BUFFER_QUEUE|Ethernet4,Ethernet45|10-15"
*/
task_process_status BufferOrch::processQueue(Consumer &consumer)
{
Expand Down
19 changes: 18 additions & 1 deletion orchagent/bufferorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include <unordered_map>
#include "orch.h"
#include "portsorch.h"
#include "redisapi.h"
#include "redisclient.h"

#define BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP "BUFFER_POOL_WATERMARK_STAT_COUNTER"

const string buffer_size_field_name = "size";
const string buffer_pool_type_field_name = "type";
Expand All @@ -30,16 +34,20 @@ class BufferOrch : public Orch
BufferOrch(DBConnector *db, vector<string> &tableNames);
bool isPortReady(const std::string& port_name) const;
static type_map m_buffer_type_maps;
void generateBufferPoolWatermarkCounterIdList(void);
const object_map &getBufferPoolNameOidMap(void);

private:
typedef task_process_status (BufferOrch::*buffer_table_handler)(Consumer& consumer);
typedef map<string, buffer_table_handler> buffer_table_handler_map;
typedef pair<string, buffer_table_handler> buffer_handler_pair;

virtual void doTask() override;
void doTask() override;
virtual void doTask(Consumer& consumer);
void initTableHandlers();
void initBufferReadyLists(DBConnector *db);
void initBufferReadyList(Table& table);
void initFlexCounterGroupTable(void);
task_process_status processBufferPool(Consumer &consumer);
task_process_status processBufferProfile(Consumer &consumer);
task_process_status processQueue(Consumer &consumer);
Expand All @@ -50,6 +58,15 @@ class BufferOrch : public Orch
buffer_table_handler_map m_bufferHandlerMap;
std::unordered_map<std::string, bool> m_ready_list;
std::unordered_map<std::string, std::vector<std::string>> m_port_ready_list_ref;

unique_ptr<DBConnector> m_flexCounterDb;
unique_ptr<ProducerTable> m_flexCounterGroupTable;
unique_ptr<ProducerTable> m_flexCounterTable;

unique_ptr<DBConnector> m_countersDb;
RedisClient m_countersDbRedisClient;

bool m_isBufferPoolWatermarkCounterIdListGenerated = false;
};
#endif /* SWSS_BUFFORCH_H */

26 changes: 24 additions & 2 deletions orchagent/flexcounterorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
#include "redisclient.h"
#include "sai_serialize.h"
#include "pfcwdorch.h"
#include "bufferorch.h"

extern sai_port_api_t *sai_port_api;

extern PortsOrch *gPortsOrch;
extern IntfsOrch *gIntfsOrch;
extern BufferOrch *gBufferOrch;

#define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK"

unordered_map<string, string> flexCounterGroupMap =
{
Expand All @@ -19,6 +23,7 @@ unordered_map<string, string> flexCounterGroupMap =
{"PFCWD", PFC_WD_FLEX_COUNTER_GROUP},
{"QUEUE_WATERMARK", QUEUE_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP},
{"PG_WATERMARK", PG_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP},
{BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP},
{"RIF", RIF_STAT_COUNTER_FLEX_COUNTER_GROUP},
};

Expand Down Expand Up @@ -76,11 +81,28 @@ void FlexCounterOrch::doTask(Consumer &consumer)
}
else if(field == FLEX_COUNTER_STATUS_FIELD)
{
// Currently the counters are disabled by default
// The queue maps will be generated as soon as counters are enabled
// Currently, the counters are disabled for polling by default
// The queue maps will be generated as soon as counters are enabled for polling
// Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains
// the list of SAI stats/attributes of polling interest, to the FLEX_COUNTER_DB under the
// additional condition that the polling interval at that time is set nonzero positive,
// which is automatically satisfied upon the creation of the orch object that requires
// the syncd flex counter polling service
// This postponement is introduced by design to accelerate the initialization process
//
// generateQueueMap() is called as long as a field "FLEX_COUNTER_STATUS" event is heard,
// regardless of whether the key is "QUEUE" or whether the value is "enable" or "disable"
// This can be because generateQueueMap() installs a fundamental list of queue stats
// that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked
// into the same function as they may not be counted as fundamental
gPortsOrch->generateQueueMap();
gPortsOrch->generatePriorityGroupMap();
gIntfsOrch->generateInterfaceMap();
// Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event
if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable"))
{
gBufferOrch->generateBufferPoolWatermarkCounterIdList();
}

vector<FieldValueTuple> fieldValues;
fieldValues.emplace_back(FLEX_COUNTER_STATUS_FIELD, value);
Expand Down
63 changes: 63 additions & 0 deletions orchagent/watermark_bufferpool.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
-- KEYS - buffer IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return nothing for now

local counters_db = ARGV[1]
local counters_table_name = 'COUNTERS'

local user_table_name = 'USER_WATERMARKS'
local persistent_table_name = 'PERSISTENT_WATERMARKS'
local periodic_table_name = 'PERIODIC_WATERMARKS'

local sai_buffer_pool_watermark_stat_name = 'SAI_BUFFER_POOL_STAT_WATERMARK_BYTES'

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each buffer pool oid
local n = table.getn(KEYS)
for i = n, 1, -1 do
-- Get new watermark value from COUNTERS
local wm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name)
if wm then
wm = tonumber(wm)

-- Get last value from *_WATERMARKS
local user_wm_last = redis.call('HGET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name)

-- Set higher value to *_WATERMARKS
if user_wm_last then
user_wm_last = tonumber(user_wm_last)
if wm > user_wm_last then
redis.call('HSET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end
else
redis.call('HSET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end

local persistent_wm_last = redis.call('HGET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name)
if persistent_wm_last then
persistent_wm_last = tonumber(persistent_wm_last)
if wm > persistent_wm_last then
redis.call('HSET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end
else
redis.call('HSET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end

local periodic_wm_last = redis.call('HGET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name)
if periodic_wm_last then
periodic_wm_last = tonumber(periodic_wm_last)
if wm > periodic_wm_last then
redis.call('HSET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end
else
redis.call('HSET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm)
end
end
end

return rets
Loading

0 comments on commit 4577b40

Please sign in to comment.