Skip to content

Commit

Permalink
Merge branch 'Azure:master' into fix/mclagsyncd-out-of-order-initiali…
Browse files Browse the repository at this point in the history
…zation
  • Loading branch information
novikauanton authored Jan 13, 2022
2 parents a01e95d + c78aa1b commit b04979d
Show file tree
Hide file tree
Showing 15 changed files with 1,085 additions and 219 deletions.
33 changes: 30 additions & 3 deletions cfgmgr/buffermgrdyn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
* In internal maps: table name removed from the index
* 2. Maintain maps for pools, profiles and PGs in CONFIG_DB and APPL_DB
* 3. Keys of maps in this file don't contain the TABLE_NAME
* 3.
*/
using namespace std;
using namespace swss;
Expand All @@ -37,6 +36,7 @@ BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBC
m_zeroProfilesLoaded(false),
m_supportRemoving(true),
m_cfgDefaultLosslessBufferParam(cfgDb, CFG_DEFAULT_LOSSLESS_BUFFER_PARAMETER),
m_cfgDeviceMetaDataTable(cfgDb, CFG_DEVICE_METADATA_TABLE_NAME),
m_applBufferPoolTable(applDb, APP_BUFFER_POOL_TABLE_NAME),
m_applBufferProfileTable(applDb, APP_BUFFER_PROFILE_TABLE_NAME),
m_applBufferObjectTables({ProducerStateTable(applDb, APP_BUFFER_PG_TABLE_NAME), ProducerStateTable(applDb, APP_BUFFER_QUEUE_TABLE_NAME)}),
Expand Down Expand Up @@ -73,6 +73,30 @@ BufferMgrDynamic::BufferMgrDynamic(DBConnector *cfgDb, DBConnector *stateDb, DBC
string checkHeadroomPluginName = "buffer_check_headroom_" + platform + ".lua";

m_platform = platform;
m_specific_platform = platform; // default for non-Mellanox
m_model_number = 0;

// Retrieve the type of mellanox platform
if (m_platform == "mellanox")
{
m_cfgDeviceMetaDataTable.hget("localhost", "platform", m_specific_platform);
if (!m_specific_platform.empty())
{
// Mellanox model number follows "sn" in the platform name and is 4 digits long
std::size_t sn_pos = m_specific_platform.find("sn");
if (sn_pos != std::string::npos)
{
std::string model_number = m_specific_platform.substr (sn_pos + 2, 4);
if (!model_number.empty())
{
m_model_number = atoi(model_number.c_str());
}
}
}
if (!m_model_number) {
SWSS_LOG_ERROR("Failed to retrieve Mellanox model number");
}
}

try
{
Expand Down Expand Up @@ -471,7 +495,9 @@ string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string

if (m_platform == "mellanox")
{
if ((speed != "400000") && (lane_count == 8))
if ((lane_count == 8) &&
(((m_model_number / 1000 == 4) && (speed != "400000")) ||
((m_model_number / 1000 == 5) && (speed != "800000"))))
{
// On Mellanox platform, ports with 8 lanes have different(double) xon value then other ports
// For ports at speed other than 400G can have
Expand All @@ -482,7 +508,8 @@ string BufferMgrDynamic::getDynamicProfileName(const string &speed, const string
// Eg.
// - A 100G port with 8 lanes will use buffer profile "pg_profile_100000_5m_8lane_profile"
// - A 100G port with 4 lanes will use buffer profile "pg_profile_100000_5m_profile"
// Currently, 400G ports can only have 8 lanes. So we don't add this to the profile
// Currently, for 4xxx models, 400G ports can only have 8 lanes,
// and for 5xxx models, 800G ports can only have 8 lanes. So we don't add this to the profile.
buffer_profile_key = buffer_profile_key + "_8lane";
}
}
Expand Down
7 changes: 5 additions & 2 deletions cfgmgr/buffermgrdyn.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ class BufferMgrDynamic : public Orch
using Orch::doTask;

private:
std::string m_platform;
std::string m_platform; // vendor, e.g. "mellanox"
std::string m_specific_platform; // name of platform, e.g. "x86_64-mlnx_msn3420-r0"
unsigned int m_model_number; // model number extracted from specific platform, e.g. 3420

std::vector<buffer_direction_t> m_bufferDirections;
const std::string m_bufferObjectNames[BUFFER_DIR_MAX];
const std::string m_bufferDirectionNames[BUFFER_DIR_MAX];
Expand Down Expand Up @@ -234,7 +237,7 @@ class BufferMgrDynamic : public Orch

// Other tables
Table m_cfgDefaultLosslessBufferParam;

Table m_cfgDeviceMetaDataTable;
Table m_stateBufferMaximumTable;

Table m_applPortTable;
Expand Down
1 change: 1 addition & 0 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dist_swss_DATA = \
pfc_detect_barefoot.lua \
pfc_detect_nephos.lua \
pfc_detect_cisco-8000.lua \
pfc_detect_vs.lua \
pfc_restore.lua \
pfc_restore_cisco-8000.lua \
port_rates.lua \
Expand Down
1 change: 0 additions & 1 deletion orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ void CrmOrch::handleSetCommand(const string& key, const vector<FieldValueTuple>&
else
{
SWSS_LOG_ERROR("Failed to parse CRM %s configuration. Unknown attribute %s.\n", key.c_str(), field.c_str());
return;
}
}
catch (const exception& e)
Expand Down
2 changes: 1 addition & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ bool OrchDaemon::init()
CFG_PFC_WD_TABLE_NAME
};

if (platform == MLNX_PLATFORM_SUBSTRING)
if ((platform == MLNX_PLATFORM_SUBSTRING) || (platform == VS_PLATFORM_SUBSTRING))
{

static const vector<sai_port_stat_t> portStatIds =
Expand Down
108 changes: 108 additions & 0 deletions orchagent/pfc_detect_vs.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval (milliseconds)
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3]) * 1000

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')

local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
-- If there is no entry in COUNTERS_QUEUE_INDEX_MAP or COUNTERS_QUEUE_PORT_MAP then
-- it means KEYS[i] queue is inserted into FLEX COUNTER DB but the corresponding
-- maps haven't been updated yet.
if queue_index and port_id then
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION_US'

-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)

if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_duration = tonumber(pfc_duration)

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_duration_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_duration_last = tonumber(pfc_duration_last)
local storm_condition = (pfc_duration - pfc_duration_last) > (poll_time * 0.8)

-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and packets - packets_last == 0 and storm_condition) then
if time_left <= poll_time then
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
if is_deadlock == false then
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
end
end
end
end
end
end

return rets
30 changes: 23 additions & 7 deletions orchagent/routeorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t&
nhgm_attrs.push_back(nhgm_attr);
}

if (m_switchOrch->checkOrderedEcmpEnable())
{
nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_SEQUENCE_ID;
nhgm_attr.value.u32 = nhopgroup->second.nhopgroup_members[nexthop].seq_id;
nhgm_attrs.push_back(nhgm_attr);
}

status = sai_next_hop_group_api->create_next_hop_group_member(&nexthop_id, gSwitchId,
(uint32_t)nhgm_attrs.size(),
nhgm_attrs.data());
Expand All @@ -393,7 +400,7 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t&

++count;
gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER);
nhopgroup->second.nhopgroup_members[nexthop] = nexthop_id;
nhopgroup->second.nhopgroup_members[nexthop].next_hop_id = nexthop_id;
}

if (!m_fgNhgOrch->validNextHopInNextHopGroup(nexthop))
Expand Down Expand Up @@ -421,7 +428,7 @@ bool RouteOrch::invalidnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t
continue;
}

nexthop_id = nhopgroup->second.nhopgroup_members[nexthop];
nexthop_id = nhopgroup->second.nhopgroup_members[nexthop].next_hop_id;
status = sai_next_hop_group_api->remove_next_hop_group_member(nexthop_id);

if (status != SAI_STATUS_SUCCESS)
Expand Down Expand Up @@ -1241,7 +1248,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
vector<sai_attribute_t> nhg_attrs;

nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_attr.value.s32 = SAI_NEXT_HOP_GROUP_TYPE_ECMP;
nhg_attr.value.s32 = m_switchOrch->checkOrderedEcmpEnable() ? SAI_NEXT_HOP_GROUP_TYPE_DYNAMIC_ORDERED_ECMP : SAI_NEXT_HOP_GROUP_TYPE_ECMP;
nhg_attrs.push_back(nhg_attr);

sai_object_id_t next_hop_group_id;
Expand Down Expand Up @@ -1295,6 +1302,13 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
nhgm_attrs.push_back(nhgm_attr);
}

if (m_switchOrch->checkOrderedEcmpEnable())
{
nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_SEQUENCE_ID;
nhgm_attr.value.u32 = ((uint32_t)i) + 1; // To make non-zero sequence id
nhgm_attrs.push_back(nhgm_attr);
}

gNextHopGroupMemberBulker.create_entry(&nhgm_ids[i],
(uint32_t)nhgm_attrs.size(),
nhgm_attrs.data());
Expand All @@ -1319,7 +1333,8 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
if (nhopgroup_shared_set.find(nhid) != nhopgroup_shared_set.end())
{
auto it = nhopgroup_shared_set[nhid].begin();
next_hop_group_entry.nhopgroup_members[*it] = nhgm_id;
next_hop_group_entry.nhopgroup_members[*it].next_hop_id = nhgm_id;
next_hop_group_entry.nhopgroup_members[*it].seq_id = (uint32_t)i + 1;
nhopgroup_shared_set[nhid].erase(it);
if (nhopgroup_shared_set[nhid].empty())
{
Expand All @@ -1328,7 +1343,8 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
}
else
{
next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second] = nhgm_id;
next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second].next_hop_id = nhgm_id;
next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second].seq_id = ((uint32_t)i) + 1;
}
}

Expand Down Expand Up @@ -1373,12 +1389,12 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
if (m_neighOrch->isNextHopFlagSet(nhop->first, NHFLAGS_IFDOWN))
{
SWSS_LOG_WARN("NHFLAGS_IFDOWN set for next hop group member %s with next_hop_id %" PRIx64,
nhop->first.to_string().c_str(), nhop->second);
nhop->first.to_string().c_str(), nhop->second.next_hop_id);
nhop = nhgm.erase(nhop);
continue;
}

next_hop_ids.push_back(nhop->second);
next_hop_ids.push_back(nhop->second.next_hop_id);
nhop = nhgm.erase(nhop);
}

Expand Down
8 changes: 7 additions & 1 deletion orchagent/routeorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@

#define LOOPBACK_PREFIX "Loopback"

typedef std::map<NextHopKey, sai_object_id_t> NextHopGroupMembers;
struct NextHopGroupMemberEntry
{
sai_object_id_t next_hop_id; // next hop sai oid
uint32_t seq_id; // Sequence Id of nexthop in the group
};

typedef std::map<NextHopKey, NextHopGroupMemberEntry> NextHopGroupMembers;

struct NhgBase;

Expand Down
Loading

0 comments on commit b04979d

Please sign in to comment.