Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sai_failure_dump]Invoking dump during SAI failure #2644

Merged
merged 6 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions orchagent/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void syncd_apply_view()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
}

Expand Down Expand Up @@ -619,7 +619,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create a switch, rv:%d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId);

Expand Down Expand Up @@ -650,7 +650,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
else
{
Expand All @@ -665,7 +665,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}

gVirtualRouterId = attr.value.oid;
Expand Down Expand Up @@ -707,7 +707,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create underlay router interface %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}

SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId);
Expand Down
2 changes: 1 addition & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ void OrchDaemon::flush()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to flush redis pipeline %d", status);
abort();
handleSaiFailure(true);
}
}

Expand Down
44 changes: 37 additions & 7 deletions orchagent/saihelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
case SAI_API_HOSTIF:
Expand All @@ -514,8 +515,10 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
default:
switch (status)
{
Expand All @@ -525,7 +528,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
}
return task_need_retry;
Expand Down Expand Up @@ -566,8 +570,10 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
case SAI_API_TUNNEL:
switch (status)
{
Expand All @@ -578,12 +584,15 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}

return task_need_retry;
Expand Down Expand Up @@ -611,7 +620,8 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
return task_need_retry;
}
Expand Down Expand Up @@ -663,3 +673,23 @@ bool parseHandleSaiStatusFailure(task_process_status status)
}
return true;
}

/* Handling SAI failure. Request redis to invoke SAI failure dump and abort if set*/
void handleSaiFailure(bool abort_on_failure)
{
SWSS_LOG_ENTER();

sai_attribute_t attr;

attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to take sai failure dump %d", status);
}
if (abort_on_failure)
{
abort();
}
}
2 changes: 1 addition & 1 deletion orchagent/saihelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);

void handleSaiFailure(bool abort_on_failure);
1 change: 1 addition & 0 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ tests_SOURCES = aclorch_ut.cpp \
flowcounterrouteorch_ut.cpp \
orchdaemon_ut.cpp \
warmrestartassist_ut.cpp \
test_failure_handling.cpp \
$(top_srcdir)/lib/gearboxutils.cpp \
$(top_srcdir)/lib/subintf.cpp \
$(top_srcdir)/orchagent/orchdaemon.cpp \
Expand Down
81 changes: 81 additions & 0 deletions tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "notifier.h"
#define private public
#include "pfcactionhandler.h"
#include <sys/mman.h>
#undef private

#include <sstream>
Expand All @@ -21,6 +22,8 @@ namespace portsorch_test

sai_port_api_t ut_sai_port_api;
sai_port_api_t *pold_sai_port_api;
sai_switch_api_t ut_sai_switch_api;
sai_switch_api_t *pold_sai_switch_api;

bool not_support_fetching_fec;
vector<sai_port_fec_mode_t> mock_port_fec_modes = {SAI_PORT_FEC_MODE_RS, SAI_PORT_FEC_MODE_FC};
Expand Down Expand Up @@ -66,9 +69,28 @@ namespace portsorch_test
_sai_set_port_fec_count++;
_sai_port_fec_mode = attr[0].value.s32;
}
else if (attr[0].id == SAI_PORT_ATTR_AUTO_NEG_MODE)
{
/* Simulating failure case */
return SAI_STATUS_FAILURE;
}
return pold_sai_port_api->set_port_attribute(port_id, attr);
}

uint32_t *_sai_syncd_notifications_count;
int32_t *_sai_syncd_notification_event;
sai_status_t _ut_stub_sai_set_switch_attribute(
_In_ sai_object_id_t switch_id,
_In_ const sai_attribute_t *attr)
{
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
{
*_sai_syncd_notifications_count =+ 1;
*_sai_syncd_notification_event = attr[0].value.s32;
}
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
}

void _hook_sai_port_api()
{
ut_sai_port_api = *sai_port_api;
Expand All @@ -83,6 +105,19 @@ namespace portsorch_test
sai_port_api = pold_sai_port_api;
}

void _hook_sai_switch_api()
{
ut_sai_switch_api = *sai_switch_api;
pold_sai_switch_api = sai_switch_api;
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
sai_switch_api = &ut_sai_switch_api;
}

void _unhook_sai_switch_api()
{
sai_switch_api = pold_sai_switch_api;
}

sai_queue_api_t ut_sai_queue_api;
sai_queue_api_t *pold_sai_queue_api;
int _sai_set_queue_attr_count = 0;
Expand Down Expand Up @@ -473,6 +508,52 @@ namespace portsorch_test
_unhook_sai_port_api();
}

TEST_F(PortsOrchTest, PortTestSAIFailureHandling)
{
_hook_sai_port_api();
_hook_sai_switch_api();
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
std::deque<KeyOpFieldsValuesTuple> entries;

not_support_fetching_fec = false;
// Get SAI default ports to populate DB
auto ports = ut_helper::getInitialSaiPorts();

for (const auto &it : ports)
{
portTable.set(it.first, it.second);
}

// Set PortConfigDone
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });

// refill consumer
gPortsOrch->addExistingData(&portTable);

// Apply configuration :
// create ports
static_cast<Orch *>(gPortsOrch)->doTask();

_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
*_sai_syncd_notifications_count = 0;

entries.push_back({"Ethernet0", "SET",
{
{"autoneg", "on"}
}});
auto consumer = dynamic_cast<Consumer *>(gPortsOrch->getExecutor(APP_PORT_TABLE_NAME));
consumer->addToSync(entries);
ASSERT_DEATH({static_cast<Orch *>(gPortsOrch)->doTask();}, "");

ASSERT_EQ(*_sai_syncd_notifications_count, 1);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
_unhook_sai_port_api();
_unhook_sai_switch_api();
}

TEST_F(PortsOrchTest, PortReadinessColdBoot)
{
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
Expand Down
82 changes: 82 additions & 0 deletions tests/mock_tests/test_failure_handling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include "saihelper.h"
#include "ut_helper.h"
#include <sys/mman.h>

extern sai_switch_api_t *sai_switch_api;

namespace saifailure_test
{
struct SaiFailureTest : public ::testing::Test
{
};
uint32_t *_sai_syncd_notifications_count;
int32_t *_sai_syncd_notification_event;
sai_switch_api_t *pold_sai_switch_api;
sai_switch_api_t ut_sai_switch_api;

sai_status_t _ut_stub_sai_set_switch_attribute(
_In_ sai_object_id_t switch_id,
_In_ const sai_attribute_t *attr)
{
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
{
*_sai_syncd_notifications_count = *_sai_syncd_notifications_count + 1;
*_sai_syncd_notification_event = attr[0].value.s32;
}
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
}

void _hook_sai_switch_api()
{
ut_sai_switch_api = *sai_switch_api;
pold_sai_switch_api = sai_switch_api;
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
sai_switch_api = &ut_sai_switch_api;
}

void _unhook_sai_switch_api()
{
sai_switch_api = pold_sai_switch_api;
}

TEST_F(SaiFailureTest, handleSaiFailure)
{
_hook_sai_switch_api();
_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
*_sai_syncd_notifications_count = 0;
uint32_t notif_count = *_sai_syncd_notifications_count;

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_FDB, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

_unhook_sai_switch_api();
}
}