From 4577b40d5ad3c752d429d6dfd344f63e3e10299e Mon Sep 17 00:00:00 2001 From: Wenda Ni Date: Fri, 14 Jun 2019 13:36:09 -0700 Subject: [PATCH] Add buffer pool watermark support (#853) * Write fields to FLEX_COUNTER_GROUP_TABLE at the construction of a BufferOrch object: "POLL_INTERVAL" "BUFFER_POLL_PLUGIN_LIST" "STATS_MODE" Signed-off-by: Wenda Ni * Update buffer pool name to oid mapping in COUNTERS_DB upon the set and del of its oid Signed-off-by: Wenda Ni * Push buffer pool watermark COUNTER_ID_LIST to FLEX_COUNTER_TABLE Signed-off-by: Wenda Ni * Implement user clear logic to buffer pool watermark Signed-off-by: Wenda Ni * Add periodic clear to buffer pool watermark Signed-off-by: Wenda Ni * Add lua script for watermark_bufferpool Signed-off-by: Wenda Ni * Fix syntax error in buffer pool watermark lua script Signed-off-by: Wenda Ni * Fix compile error in watermarkorch.cpp Signed-off-by: Wenda Ni * Fix from dut verification Signed-off-by: Wenda Ni * Add 6000 to read only polling mode Signed-off-by: Wenda Ni * Touch-up to existing codes Signed-off-by: Wenda Ni * Remove debugging symbols Signed-off-by: Wenda Ni * Address comments Signed-off-by: Wenda Ni * Address comments Signed-off-by: Wenda Ni --- orchagent/Makefile.am | 3 +- orchagent/bufferorch.cpp | 101 +++++++++++++++++++++++++++-- orchagent/bufferorch.h | 19 +++++- orchagent/flexcounterorch.cpp | 26 +++++++- orchagent/watermark_bufferpool.lua | 63 ++++++++++++++++++ orchagent/watermarkorch.cpp | 44 ++++++++++--- orchagent/watermarkorch.h | 1 + 7 files changed, 241 insertions(+), 16 deletions(-) create mode 100644 orchagent/watermark_bufferpool.lua diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index 90e56a06e31f..0fdbba21f2f6 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -11,7 +11,8 @@ dist_swss_DATA = \ pfc_detect_nephos.lua \ pfc_restore.lua \ watermark_queue.lua \ - watermark_pg.lua + watermark_pg.lua \ + watermark_bufferpool.lua bin_PROGRAMS = orchagent routeresync orchagent_restart_check diff --git a/orchagent/bufferorch.cpp b/orchagent/bufferorch.cpp index 9de842142d4d..e35b4c7f48d6 100644 --- a/orchagent/bufferorch.cpp +++ b/orchagent/bufferorch.cpp @@ -1,11 +1,13 @@ #include "tokenize.h" - #include "bufferorch.h" #include "logger.h" +#include "sai_serialize.h" #include #include +using namespace std; + extern sai_port_api_t *sai_port_api; extern sai_queue_api_t *sai_queue_api; extern sai_switch_api_t *sai_switch_api; @@ -14,7 +16,13 @@ extern sai_buffer_api_t *sai_buffer_api; extern PortsOrch *gPortsOrch; extern sai_object_id_t gSwitchId; -using namespace std; +#define BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS "10000" + + +static const vector bufferPoolWatermarkStatIds = +{ + SAI_BUFFER_POOL_STAT_WATERMARK_BYTES, +}; type_map BufferOrch::m_buffer_type_maps = { {CFG_BUFFER_POOL_TABLE_NAME, new object_map()}, @@ -25,11 +33,18 @@ type_map BufferOrch::m_buffer_type_maps = { {CFG_BUFFER_PORT_EGRESS_PROFILE_LIST_NAME, new object_map()} }; -BufferOrch::BufferOrch(DBConnector *db, vector &tableNames) : Orch(db, tableNames) +BufferOrch::BufferOrch(DBConnector *db, vector &tableNames) : + Orch(db, tableNames), + m_flexCounterDb(new DBConnector(FLEX_COUNTER_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)), + m_flexCounterTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_TABLE)), + m_flexCounterGroupTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_GROUP_TABLE)), + m_countersDb(new DBConnector(COUNTERS_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)), + m_countersDbRedisClient(m_countersDb.get()) { SWSS_LOG_ENTER(); initTableHandlers(); initBufferReadyLists(db); + initFlexCounterGroupTable(); }; void BufferOrch::initTableHandlers() @@ -82,6 +97,32 @@ void BufferOrch::initBufferReadyList(Table& table) } } +void BufferOrch::initFlexCounterGroupTable(void) +{ + string bufferPoolWmPluginName = "watermark_bufferpool.lua"; + + try + { + string bufferPoolLuaScript = swss::loadLuaScript(bufferPoolWmPluginName); + string bufferPoolWmSha = swss::loadRedisScript(m_countersDb.get(), bufferPoolLuaScript); + + vector fvTuples; + fvTuples.emplace_back(BUFFER_POOL_PLUGIN_FIELD, bufferPoolWmSha); + fvTuples.emplace_back(POLL_INTERVAL_FIELD, BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS); + + // TODO (work in progress): + // Some platforms do not support buffer pool watermark clear operation on a particular pool + // Invoke the SAI clear_stats API per pool to query the capability from the API call return status + fvTuples.emplace_back(STATS_MODE_FIELD, STATS_MODE_READ_AND_CLEAR); + + m_flexCounterGroupTable->set(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP, fvTuples); + } + catch (const runtime_error &e) + { + SWSS_LOG_ERROR("Buffer pool watermark lua script and/or flex counter group not set successfully. Runtime error: %s", e.what()); + } +} + bool BufferOrch::isPortReady(const std::string& port_name) const { SWSS_LOG_ENTER(); @@ -105,6 +146,51 @@ bool BufferOrch::isPortReady(const std::string& port_name) const return result; } +void BufferOrch::generateBufferPoolWatermarkCounterIdList(void) +{ + // This function will be called in FlexCounterOrch when field:value tuple "FLEX_COUNTER_STATUS":"enable" + // is received on buffer pool watermark key under table "FLEX_COUNTER_GROUP_TABLE" + // Because the SubscriberStateTable listens to the entire keyspace of "BUFFER_POOL_WATERMARK", any update + // to field value tuples under key "BUFFER_POOL_WATERMARK" will cause this tuple to be heard again + // To avoid resync the coutner ID list a second time, we introduce a data member variable to mark whether + // this operation has already been done or not yet + if (m_isBufferPoolWatermarkCounterIdListGenerated) + { + return; + } + + // Detokenize the SAI watermark stats to a string, separated by comma + string statList; + for (const auto &it : bufferPoolWatermarkStatIds) + { + statList += (sai_serialize_buffer_pool_stat(it) + list_item_delimiter); + } + if (!statList.empty()) + { + statList.pop_back(); + } + + vector fvTuples; + fvTuples.emplace_back(BUFFER_POOL_COUNTER_ID_LIST, statList); + + // Push buffer pool watermark COUNTER_ID_LIST to FLEX_COUNTER_TABLE on a per buffer pool basis + for (const auto &it : *(m_buffer_type_maps[CFG_BUFFER_POOL_TABLE_NAME])) + { + string key = BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP ":" + sai_serialize_object_id(it.second); + m_flexCounterTable->set(key, fvTuples); + } + + m_isBufferPoolWatermarkCounterIdListGenerated = true; +} + +const object_map &BufferOrch::getBufferPoolNameOidMap(void) +{ + // In the case different Orches are running in + // different threads, caller may need to grab a read lock + // before calling this function + return *m_buffer_type_maps[CFG_BUFFER_POOL_TABLE_NAME]; +} + task_process_status BufferOrch::processBufferPool(Consumer &consumer) { SWSS_LOG_ENTER(); @@ -209,6 +295,12 @@ task_process_status BufferOrch::processBufferPool(Consumer &consumer) } (*(m_buffer_type_maps[map_type_name]))[object_name] = sai_object; SWSS_LOG_NOTICE("Created buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str()); + // Here we take the PFC watchdog approach to update the COUNTERS_DB metadata (e.g., PFC_WD_DETECTION_TIME per queue) + // at initialization (creation and registration phase) + // Specifically, we push the buffer pool name to oid mapping upon the creation of the oid + // In pg and queue case, this mapping installment is deferred to FlexCounterOrch at a reception of field + // "FLEX_COUNTER_STATUS" + m_countersDbRedisClient.hset(COUNTERS_BUFFER_POOL_NAME_MAP, object_name, sai_serialize_object_id(sai_object)); } } else if (op == DEL_COMMAND) @@ -222,6 +314,7 @@ task_process_status BufferOrch::processBufferPool(Consumer &consumer) SWSS_LOG_NOTICE("Removed buffer pool %s with type %s", object_name.c_str(), map_type_name.c_str()); auto it_to_delete = (m_buffer_type_maps[map_type_name])->find(object_name); (m_buffer_type_maps[map_type_name])->erase(it_to_delete); + m_countersDbRedisClient.hdel(COUNTERS_BUFFER_POOL_NAME_MAP, object_name); } else { @@ -370,7 +463,7 @@ task_process_status BufferOrch::processBufferProfile(Consumer &consumer) } /* -Input sample "BUFFER_QUEUE_TABLE:Ethernet4,Ethernet45:10-15" +Input sample "BUFFER_QUEUE|Ethernet4,Ethernet45|10-15" */ task_process_status BufferOrch::processQueue(Consumer &consumer) { diff --git a/orchagent/bufferorch.h b/orchagent/bufferorch.h index f5811fdbd28f..bbc3448376a9 100644 --- a/orchagent/bufferorch.h +++ b/orchagent/bufferorch.h @@ -6,6 +6,10 @@ #include #include "orch.h" #include "portsorch.h" +#include "redisapi.h" +#include "redisclient.h" + +#define BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP "BUFFER_POOL_WATERMARK_STAT_COUNTER" const string buffer_size_field_name = "size"; const string buffer_pool_type_field_name = "type"; @@ -30,16 +34,20 @@ class BufferOrch : public Orch BufferOrch(DBConnector *db, vector &tableNames); bool isPortReady(const std::string& port_name) const; static type_map m_buffer_type_maps; + void generateBufferPoolWatermarkCounterIdList(void); + const object_map &getBufferPoolNameOidMap(void); + private: typedef task_process_status (BufferOrch::*buffer_table_handler)(Consumer& consumer); typedef map buffer_table_handler_map; typedef pair buffer_handler_pair; - virtual void doTask() override; + void doTask() override; virtual void doTask(Consumer& consumer); void initTableHandlers(); void initBufferReadyLists(DBConnector *db); void initBufferReadyList(Table& table); + void initFlexCounterGroupTable(void); task_process_status processBufferPool(Consumer &consumer); task_process_status processBufferProfile(Consumer &consumer); task_process_status processQueue(Consumer &consumer); @@ -50,6 +58,15 @@ class BufferOrch : public Orch buffer_table_handler_map m_bufferHandlerMap; std::unordered_map m_ready_list; std::unordered_map> m_port_ready_list_ref; + + unique_ptr m_flexCounterDb; + unique_ptr m_flexCounterGroupTable; + unique_ptr m_flexCounterTable; + + unique_ptr m_countersDb; + RedisClient m_countersDbRedisClient; + + bool m_isBufferPoolWatermarkCounterIdListGenerated = false; }; #endif /* SWSS_BUFFORCH_H */ diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index 2935df97ba23..1e057bfb5b4d 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -6,11 +6,15 @@ #include "redisclient.h" #include "sai_serialize.h" #include "pfcwdorch.h" +#include "bufferorch.h" extern sai_port_api_t *sai_port_api; extern PortsOrch *gPortsOrch; extern IntfsOrch *gIntfsOrch; +extern BufferOrch *gBufferOrch; + +#define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK" unordered_map flexCounterGroupMap = { @@ -19,6 +23,7 @@ unordered_map flexCounterGroupMap = {"PFCWD", PFC_WD_FLEX_COUNTER_GROUP}, {"QUEUE_WATERMARK", QUEUE_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP}, {"PG_WATERMARK", PG_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP}, + {BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP}, {"RIF", RIF_STAT_COUNTER_FLEX_COUNTER_GROUP}, }; @@ -76,11 +81,28 @@ void FlexCounterOrch::doTask(Consumer &consumer) } else if(field == FLEX_COUNTER_STATUS_FIELD) { - // Currently the counters are disabled by default - // The queue maps will be generated as soon as counters are enabled + // Currently, the counters are disabled for polling by default + // The queue maps will be generated as soon as counters are enabled for polling + // Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains + // the list of SAI stats/attributes of polling interest, to the FLEX_COUNTER_DB under the + // additional condition that the polling interval at that time is set nonzero positive, + // which is automatically satisfied upon the creation of the orch object that requires + // the syncd flex counter polling service + // This postponement is introduced by design to accelerate the initialization process + // + // generateQueueMap() is called as long as a field "FLEX_COUNTER_STATUS" event is heard, + // regardless of whether the key is "QUEUE" or whether the value is "enable" or "disable" + // This can be because generateQueueMap() installs a fundamental list of queue stats + // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked + // into the same function as they may not be counted as fundamental gPortsOrch->generateQueueMap(); gPortsOrch->generatePriorityGroupMap(); gIntfsOrch->generateInterfaceMap(); + // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event + if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) + { + gBufferOrch->generateBufferPoolWatermarkCounterIdList(); + } vector fieldValues; fieldValues.emplace_back(FLEX_COUNTER_STATUS_FIELD, value); diff --git a/orchagent/watermark_bufferpool.lua b/orchagent/watermark_bufferpool.lua new file mode 100644 index 000000000000..2466ea79c251 --- /dev/null +++ b/orchagent/watermark_bufferpool.lua @@ -0,0 +1,63 @@ +-- KEYS - buffer IDs +-- ARGV[1] - counters db index +-- ARGV[2] - counters table name +-- ARGV[3] - poll time interval +-- return nothing for now + +local counters_db = ARGV[1] +local counters_table_name = 'COUNTERS' + +local user_table_name = 'USER_WATERMARKS' +local persistent_table_name = 'PERSISTENT_WATERMARKS' +local periodic_table_name = 'PERIODIC_WATERMARKS' + +local sai_buffer_pool_watermark_stat_name = 'SAI_BUFFER_POOL_STAT_WATERMARK_BYTES' + +local rets = {} + +redis.call('SELECT', counters_db) + +-- Iterate through each buffer pool oid +local n = table.getn(KEYS) +for i = n, 1, -1 do + -- Get new watermark value from COUNTERS + local wm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name) + if wm then + wm = tonumber(wm) + + -- Get last value from *_WATERMARKS + local user_wm_last = redis.call('HGET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name) + + -- Set higher value to *_WATERMARKS + if user_wm_last then + user_wm_last = tonumber(user_wm_last) + if wm > user_wm_last then + redis.call('HSET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + else + redis.call('HSET', user_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + + local persistent_wm_last = redis.call('HGET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name) + if persistent_wm_last then + persistent_wm_last = tonumber(persistent_wm_last) + if wm > persistent_wm_last then + redis.call('HSET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + else + redis.call('HSET', persistent_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + + local periodic_wm_last = redis.call('HGET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name) + if periodic_wm_last then + periodic_wm_last = tonumber(periodic_wm_last) + if wm > periodic_wm_last then + redis.call('HSET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + else + redis.call('HSET', periodic_table_name .. ':' .. KEYS[i], sai_buffer_pool_watermark_stat_name, wm) + end + end +end + +return rets diff --git a/orchagent/watermarkorch.cpp b/orchagent/watermarkorch.cpp index 77f8aaa63673..0cf746dcd48c 100644 --- a/orchagent/watermarkorch.cpp +++ b/orchagent/watermarkorch.cpp @@ -3,6 +3,7 @@ #include "portsorch.h" #include "notifier.h" #include "converter.h" +#include "bufferorch.h" #define DEFAULT_TELEMETRY_INTERVAL 120 @@ -10,8 +11,10 @@ #define CLEAR_PG_SHARED_REQUEST "PG_SHARED" #define CLEAR_QUEUE_SHARED_UNI_REQUEST "Q_SHARED_UNI" #define CLEAR_QUEUE_SHARED_MULTI_REQUEST "Q_SHARED_MULTI" +#define CLEAR_BUFFER_POOL_REQUEST "BUFFER_POOL" extern PortsOrch *gPortsOrch; +extern BufferOrch *gBufferOrch; WatermarkOrch::WatermarkOrch(DBConnector *db, const vector &tables): @@ -175,30 +178,36 @@ void WatermarkOrch::doTask(NotificationConsumer &consumer) return; } - if(data == CLEAR_PG_HEADROOM_REQUEST) + if (data == CLEAR_PG_HEADROOM_REQUEST) { clearSingleWm(table, "SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES", m_pg_ids); } - else if(data == CLEAR_PG_SHARED_REQUEST) + else if (data == CLEAR_PG_SHARED_REQUEST) { clearSingleWm(table, "SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES", m_pg_ids); } - else if(data == CLEAR_QUEUE_SHARED_UNI_REQUEST) + else if (data == CLEAR_QUEUE_SHARED_UNI_REQUEST) { clearSingleWm(table, "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_unicast_queue_ids); } - else if(data == CLEAR_QUEUE_SHARED_MULTI_REQUEST) + else if (data == CLEAR_QUEUE_SHARED_MULTI_REQUEST) { clearSingleWm(table, "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_multicast_queue_ids); } + else if (data == CLEAR_BUFFER_POOL_REQUEST) + { + clearSingleWm(table, + "SAI_BUFFER_POOL_STAT_WATERMARK_BYTES", + gBufferOrch->getBufferPoolNameOidMap()); + } else { SWSS_LOG_WARN("Unknown watermark clear request data: %s", data.c_str()); @@ -232,10 +241,16 @@ void WatermarkOrch::doTask(SelectableTimer &timer) m_telemetryTimer->stop(); } - clearSingleWm(m_periodicWatermarkTable.get(), "SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES", m_pg_ids); - clearSingleWm(m_periodicWatermarkTable.get(), "SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES", m_pg_ids); - clearSingleWm(m_periodicWatermarkTable.get(), "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_unicast_queue_ids); - clearSingleWm(m_periodicWatermarkTable.get(), "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_multicast_queue_ids); + clearSingleWm(m_periodicWatermarkTable.get(), + "SAI_INGRESS_PRIORITY_GROUP_STAT_XOFF_ROOM_WATERMARK_BYTES", m_pg_ids); + clearSingleWm(m_periodicWatermarkTable.get(), + "SAI_INGRESS_PRIORITY_GROUP_STAT_SHARED_WATERMARK_BYTES", m_pg_ids); + clearSingleWm(m_periodicWatermarkTable.get(), + "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_unicast_queue_ids); + clearSingleWm(m_periodicWatermarkTable.get(), + "SAI_QUEUE_STAT_SHARED_WATERMARK_BYTES", m_multicast_queue_ids); + clearSingleWm(m_periodicWatermarkTable.get(), + "SAI_BUFFER_POOL_STAT_WATERMARK_BYTES", gBufferOrch->getBufferPoolNameOidMap()); SWSS_LOG_DEBUG("Periodic watermark cleared by timer!"); } } @@ -288,3 +303,16 @@ void WatermarkOrch::clearSingleWm(Table *table, string wm_name, vectorset(sai_serialize_object_id(id), vfvt); } } + +void WatermarkOrch::clearSingleWm(Table *table, string wm_name, const object_map &nameOidMap) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_DEBUG("clear WM %s, for %zu obj ids", wm_name.c_str(), nameOidMap.size()); + + vector fvTuples = {{wm_name, "0"}}; + + for (const auto &it : nameOidMap) + { + table->set(sai_serialize_object_id(it.second), fvTuples); + } +} diff --git a/orchagent/watermarkorch.h b/orchagent/watermarkorch.h index be3bfb43964e..27ac112eb157 100644 --- a/orchagent/watermarkorch.h +++ b/orchagent/watermarkorch.h @@ -35,6 +35,7 @@ class WatermarkOrch : public Orch void handleFcConfigUpdate(const std::string &key, const std::vector &fvt); void clearSingleWm(Table *table, string wm_name, vector &obj_ids); + void clearSingleWm(Table *table, string wm_name, const object_map &nameOidMap); shared_ptr getCountersTable(void) {