From ee7a73508d84eb36468d842ebc12f1928f984533 Mon Sep 17 00:00:00 2001 From: shlomibitton <60430976+shlomibitton@users.noreply.github.com> Date: Wed, 28 Apr 2021 04:36:15 +0300 Subject: [PATCH] [flex-counters] Delay flex counters stats init for faster boot time (#1646) What I did Update flex counters DB with counters stats only when counters are enabled. As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch. Why I did it Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities. This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general). The flex counters are not crucial at boot time, we can delay it to the end of the boot process. How I verified it Reboot a switch and observer the flex counters DB populated after counters are enabled. --- orchagent/flexcounterorch.cpp | 40 +++++++++- orchagent/flexcounterorch.h | 4 + orchagent/orchdaemon.cpp | 6 +- orchagent/portsorch.cpp | 83 ++++++++++++++++---- orchagent/portsorch.h | 9 +++ tests/mock_tests/mock_orchagent_main.h | 3 + tests/mock_tests/portsorch_ut.cpp | 9 +++ tests/test_flex_counters.py | 102 +++++++++++++++++++++++++ tests/test_pg_drop_counter.py | 2 + 9 files changed, 241 insertions(+), 17 deletions(-) create mode 100644 tests/test_flex_counters.py diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index a10343311306..73451337ee5a 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -16,6 +16,11 @@ extern IntfsOrch *gIntfsOrch; extern BufferOrch *gBufferOrch; #define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK" +#define PORT_KEY "PORT" +#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP" +#define QUEUE_KEY "QUEUE" +#define PG_WATERMARK_KEY "PG_WATERMARK" +#define RIF_KEY "RIF" unordered_map flexCounterGroupMap = { @@ -87,6 +92,16 @@ void FlexCounterOrch::doTask(Consumer &consumer) } else if(field == FLEX_COUNTER_STATUS_FIELD) { + if((key == PORT_KEY) && (value == "enable")) + { + gPortsOrch->generatePortCounterMap(); + m_port_counter_enabled = true; + } + if((key == PORT_BUFFER_DROP_KEY) && (value == "enable")) + { + gPortsOrch->generatePortBufferDropCounterMap(); + m_port_buffer_drop_counter_enabled = true; + } // Currently, the counters are disabled for polling by default // The queue maps will be generated as soon as counters are enabled for polling // Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains @@ -101,9 +116,18 @@ void FlexCounterOrch::doTask(Consumer &consumer) // This can be because generateQueueMap() installs a fundamental list of queue stats // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked // into the same function as they may not be counted as fundamental - gPortsOrch->generateQueueMap(); - gPortsOrch->generatePriorityGroupMap(); - gIntfsOrch->generateInterfaceMap(); + if((key == QUEUE_KEY) && (value == "enable")) + { + gPortsOrch->generateQueueMap(); + } + if((key == PG_WATERMARK_KEY) && (value == "enable")) + { + gPortsOrch->generatePriorityGroupMap(); + } + if((key == RIF_KEY) && (value == "enable")) + { + gIntfsOrch->generateInterfaceMap(); + } // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) { @@ -124,3 +148,13 @@ void FlexCounterOrch::doTask(Consumer &consumer) consumer.m_toSync.erase(it++); } } + +bool FlexCounterOrch::getPortCountersState() + { + return m_port_counter_enabled; + } + +bool FlexCounterOrch::getPortBufferDropCountersState() + { + return m_port_buffer_drop_counter_enabled; + } diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 27f6e64ab1ad..a9750aa3b8b9 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch void doTask(Consumer &consumer); FlexCounterOrch(swss::DBConnector *db, std::vector &tableNames); virtual ~FlexCounterOrch(void); + bool getPortCountersState(); + bool getPortBufferDropCountersState(); private: std::shared_ptr m_flexCounterDb = nullptr; std::shared_ptr m_flexCounterGroupTable = nullptr; + bool m_port_counter_enabled = false; + bool m_port_buffer_drop_counter_enabled = false; }; #endif diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index f3c28d218fc0..abc23cf78384 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -339,7 +339,11 @@ bool OrchDaemon::init() CFG_FLEX_COUNTER_TABLE_NAME }; - m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables)); + auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables); + m_orchList.push_back(flexCounterOrch); + + gDirectory.set(flexCounterOrch); + gDirectory.set(gPortsOrch); vector pfc_wd_tables = { CFG_PFC_WD_TABLE_NAME diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 335d1d8d35a6..9156e2a8c08f 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -233,9 +233,9 @@ static char* hostif_vlan_tag[] = { */ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames, DBConnector *chassisAppDb) : Orch(db, tableNames), - port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), - port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true), - queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true) + port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false), + port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false), + queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false) { SWSS_LOG_ENTER(); @@ -2131,19 +2131,21 @@ bool PortsOrch::initPort(const string &alias, const int index, const set &l vector fields; fields.push_back(tuple); m_counterTable->set("", fields); + // Install a flex counter for this port to track stats - std::unordered_set counter_stats; - for (const auto& it: port_stat_ids) + auto flex_counters_orch = gDirectory.get(); + /* Delay installing the counters if they are yet enabled + If they are enabled, install the counters immediately */ + if (flex_counters_orch->getPortCountersState()) { - counter_stats.emplace(sai_serialize_port_stat(it)); + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats); } - port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats); - std::unordered_set port_buffer_drop_stats; - for (const auto& it: port_buffer_drop_stat_ids) + if (flex_counters_orch->getPortBufferDropCountersState()) { - port_buffer_drop_stats.emplace(sai_serialize_port_stat(it)); + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); } - port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); PortUpdate update = { p, true }; notify(SUBJECT_TYPE_PORT_CHANGE, static_cast(&update)); @@ -2176,8 +2178,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id) p.m_port_id = port_id; /* remove port from flex_counter_table for updating counters */ - port_stat_manager.clearCounterIdList(p.m_port_id); - + auto flex_counters_orch = gDirectory.get(); + if ((flex_counters_orch->getPortCountersState())) + { + port_stat_manager.clearCounterIdList(p.m_port_id); + } /* remove port name map from counter table */ m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias); @@ -4699,6 +4704,38 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port) CounterCheckOrch::getInstance().addPort(port); } +void PortsOrch::generatePortCounterMap() +{ + if (m_isPortCounterMapGenerated) + { + return; + } + + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats); + } + + m_isPortCounterMapGenerated = true; +} + +void PortsOrch::generatePortBufferDropCounterMap() +{ + if (m_isPortBufferDropCounterMapGenerated) + { + return; + } + + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats); + } + + m_isPortBufferDropCounterMapGenerated = true; +} + void PortsOrch::doTask(NotificationConsumer &consumer) { SWSS_LOG_ENTER(); @@ -5565,6 +5602,26 @@ bool PortsOrch::setVoqInbandIntf(string &alias, string &type) return true; } +std::unordered_set PortsOrch::generateCounterStats(const string& type) +{ + std::unordered_set counter_stats; + if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_buffer_drop_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + return counter_stats; +} + void PortsOrch::voqSyncAddLag (Port &lag) { int32_t switch_id = lag.m_system_lag_info.switch_id; diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index a2d669c26384..877b69caccb4 100755 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -12,6 +12,7 @@ #include "flex_counter_manager.h" #include "gearboxutils.h" #include "saihelper.h" +#include "flexcounterorch.h" #include "lagid.h" @@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject void generateQueueMap(); void generatePriorityGroupMap(); + void generatePortCounterMap(); + void generatePortBufferDropCounterMap(); void refreshPortStatus(); bool removeAclTableGroup(const Port &p); @@ -283,6 +286,9 @@ class PortsOrch : public Orch, public Subject bool m_isPriorityGroupMapGenerated = false; void generatePriorityGroupMapPerPort(const Port& port); + bool m_isPortCounterMapGenerated = false; + bool m_isPortBufferDropCounterMapGenerated = false; + bool setPortAutoNeg(sai_object_id_t id, int an); bool setPortFecMode(sai_object_id_t id, int fec); @@ -307,6 +313,9 @@ class PortsOrch : public Orch, public Subject sai_uint32_t m_systemPortCount; bool getSystemPorts(); bool addSystemPorts(); + + std::unordered_set generateCounterStats(const string& type); + unique_ptr m_tableVoqSystemLagTable; unique_ptr
m_tableVoqSystemLagMemberTable; void voqSyncAddLag(Port &lag); diff --git a/tests/mock_tests/mock_orchagent_main.h b/tests/mock_tests/mock_orchagent_main.h index 587055b87e55..181ebac88973 100644 --- a/tests/mock_tests/mock_orchagent_main.h +++ b/tests/mock_tests/mock_orchagent_main.h @@ -15,6 +15,8 @@ #include "vxlanorch.h" #include "policerorch.h" #include "fgnhgorch.h" +#include "flexcounterorch.h" +#include "directory.h" extern int gBatchSize; extern bool gSwssRecord; @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch; extern MirrorOrch *gMirrorOrch; extern BufferOrch *gBufferOrch; extern VRFOrch *gVrfOrch; +extern Directory gDirectory; extern sai_acl_api_t *sai_acl_api; extern sai_switch_api_t *sai_switch_api; diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index 778e290d6925..ef5e4777eb08 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -145,7 +145,16 @@ namespace portsorch_test }; ASSERT_EQ(gPortsOrch, nullptr); + + vector flex_counter_tables = { + CFG_FLEX_COUNTER_TABLE_NAME + }; + + auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables); + gDirectory.set(flexCounterOrch); + gPortsOrch = new PortsOrch(m_app_db.get(), ports_tables, m_chassis_app_db.get()); + vector buffer_tables = { APP_BUFFER_POOL_TABLE_NAME, APP_BUFFER_PROFILE_TABLE_NAME, APP_BUFFER_QUEUE_TABLE_NAME, diff --git a/tests/test_flex_counters.py b/tests/test_flex_counters.py new file mode 100644 index 000000000000..fca1c0527f0e --- /dev/null +++ b/tests/test_flex_counters.py @@ -0,0 +1,102 @@ +import time +import pytest + +# Counter keys on ConfigDB +PORT_KEY = "PORT" +QUEUE_KEY = "QUEUE" +RIF_KEY = "RIF" +BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK" +PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP" +PG_WATERMARK_KEY = "PG_WATERMARK" + +# Counter stats on FlexCountersDB +PORT_STAT = "PORT_STAT_COUNTER" +QUEUE_STAT = "QUEUE_STAT_COUNTER" +RIF_STAT = "RIF_STAT_COUNTER" +BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER" +PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT" +PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER" + +# Counter maps on CountersDB +PORT_MAP = "COUNTERS_PORT_NAME_MAP" +QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP" +RIF_MAP = "COUNTERS_RIF_NAME_MAP" +BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP" +PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP" +PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP" + +NUMBER_OF_RETRIES = 10 + +counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP], + "queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP], + "rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP], + "buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP], + "port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP], + "pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]} + +class TestFlexCounters(object): + + def setup_dbs(self, dvs): + self.config_db = dvs.get_config_db() + self.flex_db = dvs.get_flex_db() + self.counters_db = dvs.get_counters_db() + + def wait_for_table(self, table): + for retry in range(NUMBER_OF_RETRIES): + counters_keys = self.counters_db.db_connection.hgetall(table) + if len(counters_keys) > 0: + return + else: + time.sleep(1) + + assert False, str(table) + " not created in Counters DB" + + def wait_for_id_list(self, stat, name, oid): + for retry in range(NUMBER_OF_RETRIES): + id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items() + if len(id_list) > 0: + return + else: + time.sleep(1) + + assert False, "No ID list for counter " + str(name) + + def verify_no_flex_counters_tables(self, counter_stat): + counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat) + assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group" + + def verify_flex_counters_populated(self, map, stat): + counters_keys = self.counters_db.db_connection.hgetall(map) + for counter_entry in counters_keys.items(): + name = counter_entry[0] + oid = counter_entry[1] + self.wait_for_id_list(stat, name, oid) + + def enable_flex_counter_group(self, group, map): + group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"} + self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry) + self.wait_for_table(map) + + @pytest.mark.parametrize("counter_type", counter_type_dict.keys()) + def test_flex_counters(self, dvs, counter_type): + """ + The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled. + After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB. + For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test. + """ + self.setup_dbs(dvs) + counter_key = counter_type_dict[counter_type][0] + counter_stat = counter_type_dict[counter_type][1] + counter_map = counter_type_dict[counter_type][2] + + self.verify_no_flex_counters_tables(counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL") + self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL") + + self.enable_flex_counter_group(counter_key, counter_map) + self.verify_flex_counters_populated(counter_map, counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL") diff --git a/tests/test_pg_drop_counter.py b/tests/test_pg_drop_counter.py index dedee82c1afb..1cdd83474721 100644 --- a/tests/test_pg_drop_counter.py +++ b/tests/test_pg_drop_counter.py @@ -65,12 +65,14 @@ def set_up_flex_counter(self): fc_status_enable = {"FLEX_COUNTER_STATUS": "enable"} self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_DROP", fc_status_enable) + self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK", fc_status_enable) def clear_flex_counter(self): for pg in self.pgs: self.flex_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP_STAT_COUNTER:{}".format(pg)) self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP") + self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK") def test_pg_drop_counters(self, dvs):