From fb06c32b2e25e6057514e9455e997ff7edcb7340 Mon Sep 17 00:00:00 2001 From: ngoc-do <72221689+ngoc-do@users.noreply.github.com> Date: Fri, 14 May 2021 14:36:04 -0700 Subject: [PATCH] [fabricportsorch] Add fabric support (#1459) This code is to add support for fabric asics and NPU with fabric ports enabled. What I did Create FabricOrchDaemon for fabric asics, instead of using OrchDaemon which is used for NPU. Create FabricPortsOrch to manage fabric ports. It collects information about port state, peer switch id and peer lane (stored in STATE_DB), sets up port stats and queue states. In future, it will also be used to enable/disable erroneous fabric ports. Fabric port and queue stats are setup to be collected via FlexCounters. --- orchagent/Makefile.am | 1 + orchagent/fabricportsorch.cpp | 268 ++++++++++++++++++++++++++++++++++ orchagent/fabricportsorch.h | 51 +++++++ orchagent/flexcounterorch.cpp | 32 +++- orchagent/main.cpp | 131 ++++++++++------- orchagent/orchdaemon.cpp | 56 ++++++- orchagent/orchdaemon.h | 21 ++- tests/conftest.py | 28 +++- tests/dvslib/dvs_database.py | 6 +- tests/mock_tests/Makefile.am | 1 + 10 files changed, 527 insertions(+), 68 deletions(-) create mode 100644 orchagent/fabricportsorch.cpp create mode 100644 orchagent/fabricportsorch.h diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index e0e17af01547..05a689f168f4 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -36,6 +36,7 @@ orchagent_SOURCES = \ neighorch.cpp \ intfsorch.cpp \ portsorch.cpp \ + fabricportsorch.cpp \ fgnhgorch.cpp \ copporch.cpp \ tunneldecaporch.cpp \ diff --git a/orchagent/fabricportsorch.cpp b/orchagent/fabricportsorch.cpp new file mode 100644 index 000000000000..a4644dfffc08 --- /dev/null +++ b/orchagent/fabricportsorch.cpp @@ -0,0 +1,268 @@ +#include "fabricportsorch.h" + +#include +#include +#include +#include + +#include "logger.h" +#include "schema.h" +#include "sai_serialize.h" +#include "timer.h" + +#define FABRIC_POLLING_INTERVAL_DEFAULT (30) +#define FABRIC_PORT_ERROR 0 +#define FABRIC_PORT_SUCCESS 1 +#define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER" +#define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000 +#define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER" +#define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000 +#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE" + +extern sai_object_id_t gSwitchId; +extern sai_switch_api_t *sai_switch_api; +extern sai_port_api_t *sai_port_api; + +const vector port_stat_ids = +{ + SAI_PORT_STAT_IF_IN_OCTETS, + SAI_PORT_STAT_IF_IN_ERRORS, + SAI_PORT_STAT_IF_IN_FABRIC_DATA_UNITS, + SAI_PORT_STAT_IF_IN_FEC_CORRECTABLE_FRAMES, + SAI_PORT_STAT_IF_IN_FEC_NOT_CORRECTABLE_FRAMES, + SAI_PORT_STAT_IF_IN_FEC_SYMBOL_ERRORS, + SAI_PORT_STAT_IF_OUT_OCTETS, + SAI_PORT_STAT_IF_OUT_FABRIC_DATA_UNITS, +}; + +static const vector queue_stat_ids = +{ + SAI_QUEUE_STAT_WATERMARK_LEVEL, + SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES, + SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL, +}; + +FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector &tableNames) : + Orch(appl_db, tableNames), + port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, + FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), + queue_stat_manager(FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, + FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), + m_timer(new SelectableTimer(timespec { .tv_sec = FABRIC_POLLING_INTERVAL_DEFAULT, .tv_nsec = 0 })) +{ + SWSS_LOG_ENTER(); + + SWSS_LOG_NOTICE( "FabricPortsOrch constructor" ); + + m_state_db = shared_ptr(new DBConnector("STATE_DB", 0)); + m_stateTable = unique_ptr(new Table(m_state_db.get(), FABRIC_PORT_TABLE)); + + m_counter_db = shared_ptr(new DBConnector("COUNTERS_DB", 0)); + m_laneQueueCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP)); + m_lanePortCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP)); + + m_flex_db = shared_ptr(new DBConnector("FLEX_COUNTER_DB", 0)); + m_flexCounterTable = unique_ptr(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE)); + + getFabricPortList(); + + auto executor = new ExecutableTimer(m_timer, this, "FABRIC_POLL"); + Orch::addExecutor(executor); + m_timer->start(); +} + +int FabricPortsOrch::getFabricPortList() +{ + SWSS_LOG_ENTER(); + + if (m_getFabricPortListDone) { + return FABRIC_PORT_SUCCESS; + } + + uint32_t i; + sai_status_t status; + sai_attribute_t attr; + + attr.id = SAI_SWITCH_ATTR_NUMBER_OF_FABRIC_PORTS; + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status); + return FABRIC_PORT_ERROR; + } + m_fabricPortCount = attr.value.u32; + SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount); + + vector fabric_port_list; + fabric_port_list.resize(m_fabricPortCount); + attr.id = SAI_SWITCH_ATTR_FABRIC_PORT_LIST; + attr.value.objlist.count = (uint32_t)fabric_port_list.size(); + attr.value.objlist.list = fabric_port_list.data(); + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get port list failure"); + } + + for (i = 0; i < m_fabricPortCount; i++) + { + sai_uint32_t lanes[1] = { 0 }; + attr.id = SAI_PORT_ATTR_HW_LANE_LIST; + attr.value.u32list.count = 1; + attr.value.u32list.list = lanes; + status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get port lane failure"); + } + int lane = attr.value.u32list.list[0]; + m_fabricLanePortMap[lane] = fabric_port_list[i]; + } + + generatePortStats(); + + m_getFabricPortListDone = true; + + updateFabricPortState(); + + return FABRIC_PORT_SUCCESS; +} + +bool FabricPortsOrch::allPortsReady() +{ + return m_getFabricPortListDone; +} + +void FabricPortsOrch::generatePortStats() +{ + // FIX_ME: This function installs flex counters for port stats + // on fabric ports for fabric asics and voq asics (that connect + // to fabric asics via fabric ports). These counters will be + // installed in FLEX_COUNTER_DB, and queried by syncd and updated + // to COUNTERS_DB. + // However, currently BCM SAI doesn't update its code to query + // port stats (metrics in list port_stat_ids) yet. + // Also, BCM sets too low value for "Max logical port count" (256), + // causing syncd to crash on voq asics that now include regular front + // panel ports, fabric ports, and multiple logical ports. + // So, this function will just do nothing for now, and we will readd + // code to install port stats counters when BCM completely supports. +} + +void FabricPortsOrch::generateQueueStats() +{ + if (m_isQueueStatsGenerated) return; + if (!m_getFabricPortListDone) return; + + // FIX_ME: Similar to generatePortStats(), generateQueueStats() installs + // flex counters for queue stats on fabric ports for fabric asics and voq asics. + // However, currently BCM SAI doesn't fully support queue stats query. + // Query on queue type and index is not supported for fabric asics while + // voq asics are not completely supported. + // So, this function will just do nothing for now, and we will readd + // code to install queue stats counters when BCM completely supports. + + m_isQueueStatsGenerated = true; +} + +void FabricPortsOrch::updateFabricPortState() +{ + if (!m_getFabricPortListDone) return; + + SWSS_LOG_ENTER(); + + sai_status_t status; + sai_attribute_t attr; + + time_t now; + struct timespec time_now; + if (clock_gettime(CLOCK_MONOTONIC, &time_now) < 0) + { + return; + } + now = time_now.tv_sec; + + for (auto p : m_fabricLanePortMap) + { + int lane = p.first; + sai_object_id_t port = p.second; + + string key = "PORT" + to_string(lane); + std::vector values; + uint32_t remote_peer; + uint32_t remote_port; + + attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED; + status = sai_port_api->get_port_attribute(port, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + // Port may not be ready for query + SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status); + return; + } + + if (m_portStatus.find(lane) != m_portStatus.end() && + m_portStatus[lane] && !attr.value.booldata) + { + m_portDownCount[lane] ++; + m_portDownSeenLastTime[lane] = now; + } + m_portStatus[lane] = attr.value.booldata; + + if (m_portStatus[lane]) + { + attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_SWITCH_ID; + status = sai_port_api->get_port_attribute(port, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get remote id failure"); + } + remote_peer = attr.value.u32; + + attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX; + status = sai_port_api->get_port_attribute(port, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get remote port index failure"); + } + remote_port = attr.value.u32; + } + + values.emplace_back("STATUS", m_portStatus[lane] ? "up" : "down"); + if (m_portStatus[lane]) + { + values.emplace_back("REMOTE_MOD", to_string(remote_peer)); + values.emplace_back("REMOTE_PORT", to_string(remote_port)); + } + if (m_portDownCount[lane] > 0) + { + values.emplace_back("PORT_DOWN_COUNT", to_string(m_portDownCount[lane])); + values.emplace_back("PORT_DOWN_SEEN_LAST_TIME", + to_string(m_portDownSeenLastTime[lane])); + } + m_stateTable->set(key, values); + } +} + +void FabricPortsOrch::doTask() +{ +} + +void FabricPortsOrch::doTask(Consumer &consumer) +{ +} + +void FabricPortsOrch::doTask(swss::SelectableTimer &timer) +{ + SWSS_LOG_ENTER(); + + if (!m_getFabricPortListDone) + { + getFabricPortList(); + } + + if (m_getFabricPortListDone) + { + updateFabricPortState(); + } +} diff --git a/orchagent/fabricportsorch.h b/orchagent/fabricportsorch.h new file mode 100644 index 000000000000..c641ee566d6d --- /dev/null +++ b/orchagent/fabricportsorch.h @@ -0,0 +1,51 @@ +#ifndef SWSS_FABRICPORTSORCH_H +#define SWSS_FABRICPORTSORCH_H + +#include + +#include "orch.h" +#include "observer.h" +#include "observer.h" +#include "producertable.h" +#include "flex_counter_manager.h" + +class FabricPortsOrch : public Orch, public Subject +{ +public: + FabricPortsOrch(DBConnector *appl_db, vector &tableNames); + bool allPortsReady(); + void generateQueueStats(); + +private: + shared_ptr m_state_db; + shared_ptr m_counter_db; + shared_ptr m_flex_db; + + unique_ptr
m_stateTable; + unique_ptr
m_laneQueueCounterTable; + unique_ptr
m_lanePortCounterTable; + unique_ptr m_flexCounterTable; + + swss::SelectableTimer *m_timer = nullptr; + + FlexCounterManager port_stat_manager; + FlexCounterManager queue_stat_manager; + + sai_uint32_t m_fabricPortCount; + map m_fabricLanePortMap; + unordered_map m_portStatus; + unordered_map m_portDownCount; + unordered_map m_portDownSeenLastTime; + + bool m_getFabricPortListDone = false; + bool m_isQueueStatsGenerated = false; + int getFabricPortList(); + void generatePortStats(); + void updateFabricPortState(); + + void doTask() override; + void doTask(Consumer &consumer); + void doTask(swss::SelectableTimer &timer); +}; + +#endif /* SWSS_FABRICPORTSORCH_H */ diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index a10343311306..bf40d7c05fcb 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -1,6 +1,7 @@ #include #include "flexcounterorch.h" #include "portsorch.h" +#include "fabricportsorch.h" #include "select.h" #include "notifier.h" #include "sai_serialize.h" @@ -12,6 +13,7 @@ extern sai_port_api_t *sai_port_api; extern PortsOrch *gPortsOrch; +extern FabricPortsOrch *gFabricPortsOrch; extern IntfsOrch *gIntfsOrch; extern BufferOrch *gBufferOrch; @@ -51,7 +53,12 @@ void FlexCounterOrch::doTask(Consumer &consumer) { SWSS_LOG_ENTER(); - if (!gPortsOrch->allPortsReady()) + if (gPortsOrch && !gPortsOrch->allPortsReady()) + { + return; + } + + if (gFabricPortsOrch && !gFabricPortsOrch->allPortsReady()) { return; } @@ -101,15 +108,28 @@ void FlexCounterOrch::doTask(Consumer &consumer) // This can be because generateQueueMap() installs a fundamental list of queue stats // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked // into the same function as they may not be counted as fundamental - gPortsOrch->generateQueueMap(); - gPortsOrch->generatePriorityGroupMap(); - gIntfsOrch->generateInterfaceMap(); + if(gPortsOrch) + { + gPortsOrch->generateQueueMap(); + gPortsOrch->generatePriorityGroupMap(); + } + if(gPortsOrch) + { + gPortsOrch->generatePriorityGroupMap(); + } + if(gIntfsOrch) + { + gIntfsOrch->generateInterfaceMap(); + } // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event - if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) + if (gBufferOrch && (key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) { gBufferOrch->generateBufferPoolWatermarkCounterIdList(); } - + if (gFabricPortsOrch) + { + gFabricPortsOrch->generateQueueStats(); + } vector fieldValues; fieldValues.emplace_back(FLEX_COUNTER_STATUS_FIELD, value); m_flexCounterGroupTable->set(flexCounterGroupMap[key], fieldValues); diff --git a/orchagent/main.cpp b/orchagent/main.cpp index a4ac21fe82fa..23d93bd291c0 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -446,7 +446,7 @@ int main(int argc, char **argv) // Get switch_type getCfgSwitchType(&config_db, gMySwitchType); - if (gMacAddress) + if (gMySwitchType != "fabric" && gMacAddress) { attr.id = SAI_SWITCH_ATTR_SRC_MAC_ADDRESS; memcpy(attr.value.mac, gMacAddress.getMac(), 6); @@ -513,6 +513,13 @@ int main(int argc, char **argv) //connection info in database_config.json chassis_app_db = make_shared("CHASSIS_APP_DB", 0, true); } + else if (gMySwitchType == "fabric") + { + SWSS_LOG_NOTICE("Switch type is fabric"); + attr.id = SAI_SWITCH_ATTR_TYPE; + attr.value.u32 = SAI_SWITCH_TYPE_FABRIC; + attrs.push_back(attr); + } /* Must be last Attribute */ attr.id = SAI_REDIS_SWITCH_ATTR_CONTEXT; @@ -527,81 +534,97 @@ int main(int argc, char **argv) } SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId); - /* Get switch source MAC address if not provided */ - if (!gMacAddress) + + if (gMySwitchType != "fabric") { - attr.id = SAI_SWITCH_ATTR_SRC_MAC_ADDRESS; + /* Get switch source MAC address if not provided */ + if (!gMacAddress) + { + attr.id = SAI_SWITCH_ATTR_SRC_MAC_ADDRESS; + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status); + exit(EXIT_FAILURE); + } + else + { + gMacAddress = attr.value.mac; + } + } + + /* Get the default virtual router ID */ + attr.id = SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID; + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); if (status != SAI_STATUS_SUCCESS) { - SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status); + SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status); exit(EXIT_FAILURE); } + + gVirtualRouterId = attr.value.oid; + SWSS_LOG_NOTICE("Get switch virtual router ID %" PRIx64, gVirtualRouterId); + + /* Get the NAT supported info */ + attr.id = SAI_SWITCH_ATTR_AVAILABLE_SNAT_ENTRY; + + status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_NOTICE("Failed to get the SNAT available entry count, rv:%d", status); + } else { - gMacAddress = attr.value.mac; + if (attr.value.u32 != 0) + { + gIsNatSupported = true; + } } - } - /* Get the default virtual router ID */ - attr.id = SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID; + /* Create a loopback underlay router interface */ + vector underlay_intf_attrs; - status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); - if (status != SAI_STATUS_SUCCESS) - { - SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status); - exit(EXIT_FAILURE); - } + sai_attribute_t underlay_intf_attr; + underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_VIRTUAL_ROUTER_ID; + underlay_intf_attr.value.oid = gVirtualRouterId; + underlay_intf_attrs.push_back(underlay_intf_attr); - gVirtualRouterId = attr.value.oid; - SWSS_LOG_NOTICE("Get switch virtual router ID %" PRIx64, gVirtualRouterId); + underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_TYPE; + underlay_intf_attr.value.s32 = SAI_ROUTER_INTERFACE_TYPE_LOOPBACK; + underlay_intf_attrs.push_back(underlay_intf_attr); - /* Get the NAT supported info */ - attr.id = SAI_SWITCH_ATTR_AVAILABLE_SNAT_ENTRY; + underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_MTU; + underlay_intf_attr.value.u32 = UNDERLAY_RIF_DEFAULT_MTU; + underlay_intf_attrs.push_back(underlay_intf_attr); - status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); - if (status != SAI_STATUS_SUCCESS) - { - SWSS_LOG_NOTICE("Failed to get the SNAT available entry count, rv:%d", status); - } - else - { - if (attr.value.u32 != 0) + status = sai_router_intfs_api->create_router_interface(&gUnderlayIfId, gSwitchId, (uint32_t)underlay_intf_attrs.size(), underlay_intf_attrs.data()); + if (status != SAI_STATUS_SUCCESS) { - gIsNatSupported = true; + SWSS_LOG_ERROR("Failed to create underlay router interface %d", status); + exit(EXIT_FAILURE); } - } - /* Create a loopback underlay router interface */ - vector underlay_intf_attrs; + SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId); - sai_attribute_t underlay_intf_attr; - underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_VIRTUAL_ROUTER_ID; - underlay_intf_attr.value.oid = gVirtualRouterId; - underlay_intf_attrs.push_back(underlay_intf_attr); + /* Initialize orchestration components */ - underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_TYPE; - underlay_intf_attr.value.s32 = SAI_ROUTER_INTERFACE_TYPE_LOOPBACK; - underlay_intf_attrs.push_back(underlay_intf_attr); - - underlay_intf_attr.id = SAI_ROUTER_INTERFACE_ATTR_MTU; - underlay_intf_attr.value.u32 = UNDERLAY_RIF_DEFAULT_MTU; - underlay_intf_attrs.push_back(underlay_intf_attr); + init_gearbox_phys(&appl_db); + } - status = sai_router_intfs_api->create_router_interface(&gUnderlayIfId, gSwitchId, (uint32_t)underlay_intf_attrs.size(), underlay_intf_attrs.data()); - if (status != SAI_STATUS_SUCCESS) + shared_ptr orchDaemon; + if (gMySwitchType != "fabric") { - SWSS_LOG_ERROR("Failed to create underlay router interface %d", status); - exit(EXIT_FAILURE); + orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_app_db.get()); + if (gMySwitchType == "voq") + { + orchDaemon->setFabricEnabled(true); + } + } + else + { + orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_app_db.get()); } - - SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId); - - /* Initialize orchestration components */ - - init_gearbox_phys(&appl_db); - - auto orchDaemon = make_shared(&appl_db, &config_db, &state_db, chassis_app_db.get()); if (!orchDaemon->init()) { diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index f3c28d218fc0..4f01e291a6d2 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -26,6 +26,7 @@ extern void syncd_apply_view(); * Global orch daemon variables */ PortsOrch *gPortsOrch; +FabricPortsOrch *gFabricPortsOrch; FdbOrch *gFdbOrch; IntfsOrch *gIntfsOrch; NeighOrch *gNeighOrch; @@ -49,6 +50,7 @@ OrchDaemon::OrchDaemon(DBConnector *applDb, DBConnector *configDb, DBConnector * m_chassisAppDb(chassisAppDb) { SWSS_LOG_ENTER(); + m_select = new Select(); } OrchDaemon::~OrchDaemon() @@ -68,6 +70,7 @@ OrchDaemon::~OrchDaemon() for(; it != m_orchList.rend(); ++it) { delete(*it); } + delete m_select; } bool OrchDaemon::init() @@ -333,7 +336,14 @@ bool OrchDaemon::init() m_orchList.push_back(mux_cb_orch); m_orchList.push_back(mux_st_orch); - m_select = new Select(); + if (m_fabricEnabled) + { + vector fabric_port_tables = { + // empty for now + }; + gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables); + m_orchList.push_back(gFabricPortsOrch); + } vector flex_counter_tables = { CFG_FLEX_COUNTER_TABLE_NAME @@ -570,7 +580,7 @@ void OrchDaemon::start() * Not doing this under Select::TIMEOUT condition because of * the existence of finer granularity ExecutableTimer with select */ - if (gSwitchOrch->checkRestartReady()) + if (gSwitchOrch && gSwitchOrch->checkRestartReady()) { bool ret = warmRestartCheck(); if (ret) @@ -583,10 +593,13 @@ void OrchDaemon::start() gSwitchOrch->setAgingFDB(0); // Disable FDB learning on all bridge ports - for (auto& pair: gPortsOrch->getAllPorts()) + if (gPortsOrch) { - auto& port = pair.second; - gPortsOrch->setBridgePortLearningFDB(port, SAI_BRIDGE_PORT_FDB_LEARNING_MODE_DISABLE); + for (auto& pair: gPortsOrch->getAllPorts()) + { + auto& port = pair.second; + gPortsOrch->setBridgePortLearningFDB(port, SAI_BRIDGE_PORT_FDB_LEARNING_MODE_DISABLE); + } } // Flush sairedis's redis pipeline @@ -744,3 +757,36 @@ bool OrchDaemon::warmRestartCheck() gSwitchOrch->restartCheckReply(op, data, values); return ret; } + +void OrchDaemon::addOrchList(Orch *o) +{ + m_orchList.push_back(o); +} + +FabricOrchDaemon::FabricOrchDaemon(DBConnector *applDb, DBConnector *configDb, DBConnector *stateDb, DBConnector *chassisAppDb) : + OrchDaemon(applDb, configDb, stateDb, chassisAppDb), + m_applDb(applDb), + m_configDb(configDb) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("FabricOrchDaemon starting..."); +} + +bool FabricOrchDaemon::init() +{ + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("FabricOrchDaemon init"); + + vector fabric_port_tables = { + // empty for now, I don't consume anything yet + }; + gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables); + addOrchList(gFabricPortsOrch); + + vector flex_counter_tables = { + CFG_FLEX_COUNTER_TABLE_NAME + }; + addOrchList(new FlexCounterOrch(m_configDb, flex_counter_tables)); + + return true; +} diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 1d34574284fc..182941426531 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -7,6 +7,7 @@ #include "select.h" #include "portsorch.h" +#include "fabricportsorch.h" #include "intfsorch.h" #include "neighorch.h" #include "routeorch.h" @@ -42,23 +43,41 @@ class OrchDaemon OrchDaemon(DBConnector *, DBConnector *, DBConnector *, DBConnector *); ~OrchDaemon(); - bool init(); + virtual bool init(); void start(); bool warmRestoreAndSyncUp(); void getTaskToSync(vector &ts); bool warmRestoreValidation(); bool warmRestartCheck(); + + void addOrchList(Orch* o); + void setFabricEnabled(bool enabled) + { + m_fabricEnabled = enabled; + } private: DBConnector *m_applDb; DBConnector *m_configDb; DBConnector *m_stateDb; DBConnector *m_chassisAppDb; + bool m_fabricEnabled = false; + std::vector m_orchList; Select *m_select; void flush(); }; +class FabricOrchDaemon : public OrchDaemon +{ +public: + FabricOrchDaemon(DBConnector *, DBConnector *, DBConnector *, DBConnector *); + bool init() override; +private: + DBConnector *m_applDb; + DBConnector *m_configDb; +}; + #endif /* SWSS_ORCHDAEMON_H */ diff --git a/tests/conftest.py b/tests/conftest.py index 27d601bb8923..d707a3d08ab8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,6 +32,11 @@ # a dynamic number of ports. GitHub Issue: Azure/sonic-swss#1384. NUM_PORTS = 32 +# FIXME: Voq asics will have 16 fabric ports created (defined in Azure/sonic-buildimage#6185). +# Right now, we set FABRIC_NUM_PORTS to 0, and change to 16 when PR#6185 merges. PR#6185 can't +# be merged before this PR. Otherwise it will cause swss voq test failures. +FABRIC_NUM_PORTS = 0 + def ensure_system(cmd): rc, output = subprocess.getstatusoutput(cmd) if rc: @@ -467,6 +472,12 @@ def check_swss_ready(self, timeout: int = 300) -> None: """ num_ports = NUM_PORTS + # Voq and fabric asics have fabric ports enabled + self.get_config_db() + metadata = self.config_db.get_entry('DEVICE_METADATA|localhost', '') + if metadata.get('switch_type', 'npu') in ['voq', 'fabric']: + num_ports = NUM_PORTS + FABRIC_NUM_PORTS + # Verify that all ports have been initialized and configured app_db = self.get_app_db() startup_polling_config = PollingConfig(5, timeout, strict=True) @@ -479,7 +490,22 @@ def _polling_function(): # Verify that all ports have been created asic_db = self.get_asic_db() - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1) # +1 CPU Port + + # Verify that we have "at least" NUM_PORTS + FABRIC_NUM_PORTS, rather exact number. + # Right now, FABRIC_NUM_PORTS = 0. So it essentially waits for at least NUM_PORTS. + # This will allow us to merge Azure/sonic-buildimage#6185 that creates 16 fabric ports. + # When PR#6185 merges, FABRIC_NUM_PORTS should be 16, and so this verification (at least + # NUM_PORTS) still holds. + # Will update FABRIC_NUM_PORTS to 16, and revert back to wait exact NUM_PORTS + FABRIC_NUM_PORTS + # when PR#6185 merges. + wait_at_least_n_keys = True + + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1, wait_at_least_n_keys) # +1 CPU Port + + # Verify that fabric ports are monitored in STATE_DB + if metadata.get('switch_type', 'npu') in ['voq', 'fabric']: + self.get_state_db() + self.state_db.wait_for_n_keys("FABRIC_PORT_TABLE", FABRIC_NUM_PORTS, wait_at_least_n_keys) def net_cleanup(self) -> None: """Clean up network, remove extra links.""" diff --git a/tests/dvslib/dvs_database.py b/tests/dvslib/dvs_database.py index 7d268e5a6d69..f2657f75161b 100644 --- a/tests/dvslib/dvs_database.py +++ b/tests/dvslib/dvs_database.py @@ -330,6 +330,7 @@ def wait_for_n_keys( self, table_name: str, num_keys: int, + wait_at_least_n_keys: bool = False, polling_config: PollingConfig = PollingConfig(), failure_message: str = None, ) -> List[str]: @@ -348,7 +349,10 @@ def wait_for_n_keys( def access_function(): keys = self.get_keys(table_name) - return (len(keys) == num_keys, keys) + if wait_at_least_n_keys: + return (len(keys) >= num_keys, keys) + else: + return (len(keys) == num_keys, keys) status, result = wait_for_result( access_function, self._disable_strict_polling(polling_config) diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 82ceaa2d6f7b..75125acc78ef 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -41,6 +41,7 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/neighorch.cpp \ $(top_srcdir)/orchagent/intfsorch.cpp \ $(top_srcdir)/orchagent/portsorch.cpp \ + $(top_srcdir)/orchagent/fabricportsorch.cpp \ $(top_srcdir)/orchagent/copporch.cpp \ $(top_srcdir)/orchagent/tunneldecaporch.cpp \ $(top_srcdir)/orchagent/qosorch.cpp \