Skip to content

Commit

Permalink
[flex-counters] Delay flex counters stats init for faster boot time (s…
Browse files Browse the repository at this point in the history
…onic-net#1646)

What I did
Update flex counters DB with counters stats only when counters are enabled.
As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch.

Why I did it
Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities.
This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general).
The flex counters are not crucial at boot time, we can delay it to the end of the boot process.

How I verified it
Reboot a switch and observer the flex counters DB populated after counters are enabled.
  • Loading branch information
shlomibitton authored Apr 28, 2021
1 parent b962a60 commit ee7a735
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 17 deletions.
40 changes: 37 additions & 3 deletions orchagent/flexcounterorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ extern IntfsOrch *gIntfsOrch;
extern BufferOrch *gBufferOrch;

#define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK"
#define PORT_KEY "PORT"
#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP"
#define QUEUE_KEY "QUEUE"
#define PG_WATERMARK_KEY "PG_WATERMARK"
#define RIF_KEY "RIF"

unordered_map<string, string> flexCounterGroupMap =
{
Expand Down Expand Up @@ -87,6 +92,16 @@ void FlexCounterOrch::doTask(Consumer &consumer)
}
else if(field == FLEX_COUNTER_STATUS_FIELD)
{
if((key == PORT_KEY) && (value == "enable"))
{
gPortsOrch->generatePortCounterMap();
m_port_counter_enabled = true;
}
if((key == PORT_BUFFER_DROP_KEY) && (value == "enable"))
{
gPortsOrch->generatePortBufferDropCounterMap();
m_port_buffer_drop_counter_enabled = true;
}
// Currently, the counters are disabled for polling by default
// The queue maps will be generated as soon as counters are enabled for polling
// Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains
Expand All @@ -101,9 +116,18 @@ void FlexCounterOrch::doTask(Consumer &consumer)
// This can be because generateQueueMap() installs a fundamental list of queue stats
// that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked
// into the same function as they may not be counted as fundamental
gPortsOrch->generateQueueMap();
gPortsOrch->generatePriorityGroupMap();
gIntfsOrch->generateInterfaceMap();
if((key == QUEUE_KEY) && (value == "enable"))
{
gPortsOrch->generateQueueMap();
}
if((key == PG_WATERMARK_KEY) && (value == "enable"))
{
gPortsOrch->generatePriorityGroupMap();
}
if((key == RIF_KEY) && (value == "enable"))
{
gIntfsOrch->generateInterfaceMap();
}
// Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event
if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable"))
{
Expand All @@ -124,3 +148,13 @@ void FlexCounterOrch::doTask(Consumer &consumer)
consumer.m_toSync.erase(it++);
}
}

bool FlexCounterOrch::getPortCountersState()
{
return m_port_counter_enabled;
}

bool FlexCounterOrch::getPortBufferDropCountersState()
{
return m_port_buffer_drop_counter_enabled;
}
4 changes: 4 additions & 0 deletions orchagent/flexcounterorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch
void doTask(Consumer &consumer);
FlexCounterOrch(swss::DBConnector *db, std::vector<std::string> &tableNames);
virtual ~FlexCounterOrch(void);
bool getPortCountersState();
bool getPortBufferDropCountersState();

private:
std::shared_ptr<swss::DBConnector> m_flexCounterDb = nullptr;
std::shared_ptr<swss::ProducerTable> m_flexCounterGroupTable = nullptr;
bool m_port_counter_enabled = false;
bool m_port_buffer_drop_counter_enabled = false;
};

#endif
6 changes: 5 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,11 @@ bool OrchDaemon::init()
CFG_FLEX_COUNTER_TABLE_NAME
};

m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables));
auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables);
m_orchList.push_back(flexCounterOrch);

gDirectory.set(flexCounterOrch);
gDirectory.set(gPortsOrch);

vector<string> pfc_wd_tables = {
CFG_PFC_WD_TABLE_NAME
Expand Down
83 changes: 70 additions & 13 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ static char* hostif_vlan_tag[] = {
*/
PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames, DBConnector *chassisAppDb) :
Orch(db, tableNames),
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true),
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true)
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false),
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false),
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false)
{
SWSS_LOG_ENTER();

Expand Down Expand Up @@ -2131,19 +2131,21 @@ bool PortsOrch::initPort(const string &alias, const int index, const set<int> &l
vector<FieldValueTuple> fields;
fields.push_back(tuple);
m_counterTable->set("", fields);

// Install a flex counter for this port to track stats
std::unordered_set<std::string> counter_stats;
for (const auto& it: port_stat_ids)
auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
/* Delay installing the counters if they are yet enabled
If they are enabled, install the counters immediately */
if (flex_counters_orch->getPortCountersState())
{
counter_stats.emplace(sai_serialize_port_stat(it));
auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats);
}
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats);
std::unordered_set<std::string> port_buffer_drop_stats;
for (const auto& it: port_buffer_drop_stat_ids)
if (flex_counters_orch->getPortBufferDropCountersState())
{
port_buffer_drop_stats.emplace(sai_serialize_port_stat(it));
auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);
}
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);

PortUpdate update = { p, true };
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
Expand Down Expand Up @@ -2176,8 +2178,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id)
p.m_port_id = port_id;

/* remove port from flex_counter_table for updating counters */
port_stat_manager.clearCounterIdList(p.m_port_id);

auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
if ((flex_counters_orch->getPortCountersState()))
{
port_stat_manager.clearCounterIdList(p.m_port_id);
}
/* remove port name map from counter table */
m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias);

Expand Down Expand Up @@ -4699,6 +4704,38 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port)
CounterCheckOrch::getInstance().addPort(port);
}

void PortsOrch::generatePortCounterMap()
{
if (m_isPortCounterMapGenerated)
{
return;
}

auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
for (const auto& it: m_portList)
{
port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats);
}

m_isPortCounterMapGenerated = true;
}

void PortsOrch::generatePortBufferDropCounterMap()
{
if (m_isPortBufferDropCounterMapGenerated)
{
return;
}

auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
for (const auto& it: m_portList)
{
port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats);
}

m_isPortBufferDropCounterMapGenerated = true;
}

void PortsOrch::doTask(NotificationConsumer &consumer)
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -5565,6 +5602,26 @@ bool PortsOrch::setVoqInbandIntf(string &alias, string &type)
return true;
}

std::unordered_set<std::string> PortsOrch::generateCounterStats(const string& type)
{
std::unordered_set<std::string> counter_stats;
if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP)
{
for (const auto& it: port_stat_ids)
{
counter_stats.emplace(sai_serialize_port_stat(it));
}
}
else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP)
{
for (const auto& it: port_buffer_drop_stat_ids)
{
counter_stats.emplace(sai_serialize_port_stat(it));
}
}
return counter_stats;
}

void PortsOrch::voqSyncAddLag (Port &lag)
{
int32_t switch_id = lag.m_system_lag_info.switch_id;
Expand Down
9 changes: 9 additions & 0 deletions orchagent/portsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "flex_counter_manager.h"
#include "gearboxutils.h"
#include "saihelper.h"
#include "flexcounterorch.h"
#include "lagid.h"


Expand Down Expand Up @@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject

void generateQueueMap();
void generatePriorityGroupMap();
void generatePortCounterMap();
void generatePortBufferDropCounterMap();

void refreshPortStatus();
bool removeAclTableGroup(const Port &p);
Expand Down Expand Up @@ -283,6 +286,9 @@ class PortsOrch : public Orch, public Subject
bool m_isPriorityGroupMapGenerated = false;
void generatePriorityGroupMapPerPort(const Port& port);

bool m_isPortCounterMapGenerated = false;
bool m_isPortBufferDropCounterMapGenerated = false;

bool setPortAutoNeg(sai_object_id_t id, int an);
bool setPortFecMode(sai_object_id_t id, int fec);

Expand All @@ -307,6 +313,9 @@ class PortsOrch : public Orch, public Subject
sai_uint32_t m_systemPortCount;
bool getSystemPorts();
bool addSystemPorts();

std::unordered_set<std::string> generateCounterStats(const string& type);

unique_ptr<Table> m_tableVoqSystemLagTable;
unique_ptr<Table> m_tableVoqSystemLagMemberTable;
void voqSyncAddLag(Port &lag);
Expand Down
3 changes: 3 additions & 0 deletions tests/mock_tests/mock_orchagent_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "vxlanorch.h"
#include "policerorch.h"
#include "fgnhgorch.h"
#include "flexcounterorch.h"
#include "directory.h"

extern int gBatchSize;
extern bool gSwssRecord;
Expand Down Expand Up @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch;
extern MirrorOrch *gMirrorOrch;
extern BufferOrch *gBufferOrch;
extern VRFOrch *gVrfOrch;
extern Directory<Orch*> gDirectory;

extern sai_acl_api_t *sai_acl_api;
extern sai_switch_api_t *sai_switch_api;
Expand Down
9 changes: 9 additions & 0 deletions tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,16 @@ namespace portsorch_test
};

ASSERT_EQ(gPortsOrch, nullptr);

vector<string> flex_counter_tables = {
CFG_FLEX_COUNTER_TABLE_NAME
};

auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables);
gDirectory.set(flexCounterOrch);

gPortsOrch = new PortsOrch(m_app_db.get(), ports_tables, m_chassis_app_db.get());

vector<string> buffer_tables = { APP_BUFFER_POOL_TABLE_NAME,
APP_BUFFER_PROFILE_TABLE_NAME,
APP_BUFFER_QUEUE_TABLE_NAME,
Expand Down
102 changes: 102 additions & 0 deletions tests/test_flex_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import time
import pytest

# Counter keys on ConfigDB
PORT_KEY = "PORT"
QUEUE_KEY = "QUEUE"
RIF_KEY = "RIF"
BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK"
PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP"
PG_WATERMARK_KEY = "PG_WATERMARK"

# Counter stats on FlexCountersDB
PORT_STAT = "PORT_STAT_COUNTER"
QUEUE_STAT = "QUEUE_STAT_COUNTER"
RIF_STAT = "RIF_STAT_COUNTER"
BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER"
PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT"
PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER"

# Counter maps on CountersDB
PORT_MAP = "COUNTERS_PORT_NAME_MAP"
QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP"
RIF_MAP = "COUNTERS_RIF_NAME_MAP"
BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP"
PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP"
PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP"

NUMBER_OF_RETRIES = 10

counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP],
"queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP],
"rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP],
"buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP],
"port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP],
"pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]}

class TestFlexCounters(object):

def setup_dbs(self, dvs):
self.config_db = dvs.get_config_db()
self.flex_db = dvs.get_flex_db()
self.counters_db = dvs.get_counters_db()

def wait_for_table(self, table):
for retry in range(NUMBER_OF_RETRIES):
counters_keys = self.counters_db.db_connection.hgetall(table)
if len(counters_keys) > 0:
return
else:
time.sleep(1)

assert False, str(table) + " not created in Counters DB"

def wait_for_id_list(self, stat, name, oid):
for retry in range(NUMBER_OF_RETRIES):
id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items()
if len(id_list) > 0:
return
else:
time.sleep(1)

assert False, "No ID list for counter " + str(name)

def verify_no_flex_counters_tables(self, counter_stat):
counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat)
assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group"

def verify_flex_counters_populated(self, map, stat):
counters_keys = self.counters_db.db_connection.hgetall(map)
for counter_entry in counters_keys.items():
name = counter_entry[0]
oid = counter_entry[1]
self.wait_for_id_list(stat, name, oid)

def enable_flex_counter_group(self, group, map):
group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"}
self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry)
self.wait_for_table(map)

@pytest.mark.parametrize("counter_type", counter_type_dict.keys())
def test_flex_counters(self, dvs, counter_type):
"""
The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled.
After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB.
For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test.
"""
self.setup_dbs(dvs)
counter_key = counter_type_dict[counter_type][0]
counter_stat = counter_type_dict[counter_type][1]
counter_map = counter_type_dict[counter_type][2]

self.verify_no_flex_counters_tables(counter_stat)

if counter_type == "rif_counter":
self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL")
self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL")

self.enable_flex_counter_group(counter_key, counter_map)
self.verify_flex_counters_populated(counter_map, counter_stat)

if counter_type == "rif_counter":
self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL")
Loading

0 comments on commit ee7a735

Please sign in to comment.