Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle dual ToR neighbor miss scenario #2151

Merged
merged 8 commits into from
Aug 20, 2022
34 changes: 30 additions & 4 deletions neighsyncd/neighsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConne
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
m_cfgInterfaceTable(cfgDb, CFG_INTF_TABLE_NAME),
m_cfgLagInterfaceTable(cfgDb, CFG_LAG_INTF_TABLE_NAME),
m_cfgVlanInterfaceTable(cfgDb, CFG_VLAN_INTF_TABLE_NAME)
m_cfgVlanInterfaceTable(cfgDb, CFG_VLAN_INTF_TABLE_NAME),
m_cfgPeerSwitchTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME)
{
m_AppRestartAssist = new AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", DEFAULT_NEIGHSYNC_WARMSTART_TIMER);
if (m_AppRestartAssist)
Expand Down Expand Up @@ -108,14 +109,39 @@ void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
return;
}

std::vector<std::string> peerSwitchKeys;
bool delete_key = false;
if ((nlmsg_type == RTM_DELNEIGH) || (state == NUD_INCOMPLETE) ||
(state == NUD_FAILED))
bool use_zero_mac = false;
m_cfgPeerSwitchTable.getKeys(peerSwitchKeys);
bool is_dualtor = peerSwitchKeys.size() > 0;
if (is_dualtor && (state == NUD_INCOMPLETE || state == NUD_FAILED))
{
SWSS_LOG_INFO("Unable to resolve %s, setting zero MAC", key.c_str());
use_zero_mac = true;

// Unresolved neighbor deletion on dual ToR devices must be handled
// separately, otherwise delete_key is never set to true
// and neighorch is never able to remove the neighbor
if (nlmsg_type == RTM_DELNEIGH)
{
delete_key = true;
}
}
else if ((nlmsg_type == RTM_DELNEIGH) ||
(state == NUD_INCOMPLETE) || (state == NUD_FAILED))
{
delete_key = true;
}

nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
if (use_zero_mac)
{
std::string zero_mac = "00:00:00:00:00:00";
strncpy(macStr, zero_mac.c_str(), zero_mac.length());
}
else
{
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
}

/* Ignore neighbor entries with Broadcast Mac - Trigger for directed broadcast */
if (!delete_key && (MacAddress(macStr) == MacAddress("ff:ff:ff:ff:ff:ff")))
Expand Down
2 changes: 1 addition & 1 deletion neighsyncd/neighsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class NeighSync : public NetMsg
}

private:
Table m_stateNeighRestoreTable;
Table m_stateNeighRestoreTable, m_cfgPeerSwitchTable;
ProducerStateTable m_neighTable;
AppRestartAssist *m_AppRestartAssist;
Table m_cfgVlanInterfaceTable, m_cfgLagInterfaceTable, m_cfgInterfaceTable;
Expand Down
53 changes: 53 additions & 0 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ MuxCable::MuxCable(string name, IpPrefix& srv_ip4, IpPrefix& srv_ip6, IpAddress

/* Set initial state to "standby" */
stateStandby();
state_ = MuxState::MUX_STATE_STANDBY;
}

bool MuxCable::stateInitActive()
Expand Down Expand Up @@ -1068,6 +1069,37 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
return;
}

auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
// Handling zero MAC neighbor updates
if (!update.mac)
{
/* For neighbors that were previously resolvable but are now unresolvable,
* we expect such neighbor entries to be deleted prior to a zero MAC update
* arriving for that same neighbor.
*/

if (update.add)
{
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
{
createStandaloneTunnelRoute(update.entry.ip_address);
}
/* If the MAC address in the neighbor entry is zero but the neighbor IP
* is already present in standalone_tunnel_neighbors_, assume we have already
* added a tunnel route for it and exit early
*/
return;
}
}
/* If the update operation for a neighbor contains a non-zero MAC, we must
* make sure to remove any existing tunnel routes to prevent conflicts.
* This block also covers the case of neighbor deletion.
*/
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
{
removeStandaloneTunnelRoute(update.entry.ip_address);
}

for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
{
MuxCable* ptr = it->second.get();
Expand Down Expand Up @@ -1375,6 +1407,27 @@ bool MuxOrch::delOperation(const Request& request)
return true;
}

void MuxOrch::createStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Creating standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
sai_object_id_t tunnel_nexthop = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
if (tunnel_nexthop == SAI_NULL_OBJECT_ID) {
SWSS_LOG_NOTICE("%s nexthop not created yet, ignoring tunnel route creation for %s", MUX_TUNNEL, neighborIp.to_string().c_str());
return;
}
IpPrefix pfx = neighborIp.to_string();
create_route(pfx, tunnel_nexthop);
standalone_tunnel_neighbors_.insert(neighborIp);
}

void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Removing standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
IpPrefix pfx = neighborIp.to_string();
remove_route(pfx);
standalone_tunnel_neighbors_.erase(neighborIp);
}

MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
Orch2(db, tableName, request_),
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),
Expand Down
8 changes: 8 additions & 0 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ class MuxOrch : public Orch2, public Observer, public Subject

bool getMuxPort(const MacAddress&, const string&, string&);

/***
* Methods for managing tunnel routes for neighbor IPs not associated
* with a specific mux cable
***/
void createStandaloneTunnelRoute(IpAddress neighborIp);
void removeStandaloneTunnelRoute(IpAddress neighborIp);

IpAddress mux_peer_switch_ = 0x0;
sai_object_id_t mux_tunnel_id_ = SAI_NULL_OBJECT_ID;

Expand All @@ -215,6 +222,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
FdbOrch *fdb_orch_;

MuxCfgRequest request_;
std::set<IpAddress> standalone_tunnel_neighbors_;
};

const request_description_t mux_cable_request_description = {
Expand Down
20 changes: 19 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,16 @@ void NeighOrch::doTask(Consumer &consumer)
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
{
if (addNeighbor(neighbor_entry, mac_address))
// only for unresolvable neighbors that are new
if (!mac_address)
{
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
addZeroMacTunnelRoute(neighbor_entry, mac_address);
}
it = consumer.m_toSync.erase(it);
}
else if (addNeighbor(neighbor_entry, mac_address))
{
it = consumer.m_toSync.erase(it);
}
Expand Down Expand Up @@ -1716,3 +1725,12 @@ void NeighOrch::updateSrv6Nexthop(const NextHopKey &nh, const sai_object_id_t &n
m_syncdNextHops.erase(nh);
}
}
void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
{
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
NeighborUpdate update = {entry, mac, true};
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
m_syncdNeighbors[entry] = { mac, false };
}

2 changes: 2 additions & 0 deletions orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ class NeighOrch : public Orch, public Subject, public Observer

bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
void clearResolvedNeighborEntry(const NeighborEntry &);

void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
};

#endif /* SWSS_NEIGHORCH_H */
22 changes: 13 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1794,15 +1794,15 @@ def update_dvs(log_path, new_dvs_env=[]):
dvs.runcmd("mv /etc/sonic/config_db.json.orig /etc/sonic/config_db.json")
dvs.ctn_restart()

@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dvs(request, manage_dvs) -> DockerVirtualSwitch:
dvs_env = getattr(request.module, "DVS_ENV", [])
name = request.config.getoption("--dvsname")
log_path = name if name else request.module.__name__

return manage_dvs(log_path, dvs_env)

@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def vct(request):
vctns = request.config.getoption("--vctns")
topo = request.config.getoption("--topo")
Expand All @@ -1821,7 +1821,8 @@ def vct(request):
vct.get_logs(request.module.__name__)
vct.destroy()

@pytest.yield_fixture

@pytest.fixture
def testlog(request, dvs):
dvs.runcmd(f"logger -t pytest === start test {request.node.nodeid} ===")
yield testlog
Expand Down Expand Up @@ -1850,26 +1851,28 @@ def dvs_route(request, dvs) -> DVSRoute:

# FIXME: The rest of these also need to be reverted back to normal fixtures to
# appease the linter.
@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_lag_manager(request, dvs):
request.cls.dvs_lag = dvs_lag.DVSLag(dvs.get_asic_db(),
dvs.get_config_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_vlan_manager(request, dvs):
request.cls.dvs_vlan = dvs_vlan.DVSVlan(dvs.get_asic_db(),
dvs.get_config_db(),
dvs.get_state_db(),
dvs.get_counters_db(),
dvs.get_app_db())

@pytest.yield_fixture(scope="class")

@pytest.fixture(scope="class")
def dvs_port_manager(request, dvs):
request.cls.dvs_port = dvs_port.DVSPort(dvs.get_asic_db(),
dvs.get_config_db())

@pytest.yield_fixture(scope="class")

@pytest.fixture(scope="class")
def dvs_mirror_manager(request, dvs):
request.cls.dvs_mirror = dvs_mirror.DVSMirror(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1878,7 +1881,7 @@ def dvs_mirror_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_policer_manager(request, dvs):
request.cls.dvs_policer = dvs_policer.DVSPolicer(dvs.get_asic_db(),
dvs.get_config_db())
Expand All @@ -1896,7 +1899,8 @@ def remove_dpb_config_file(dvs):
cmd = "mv /etc/sonic/config_db.json.bak /etc/sonic/config_db.json"
dvs.runcmd(cmd)

@pytest.yield_fixture(scope="module")

@pytest.fixture(scope="module")
def dpb_setup_fixture(dvs):
create_dpb_config_file(dvs)
if dvs.vct is None:
Expand Down
Loading