Skip to content

Commit

Permalink
[sai_failure_dump]Invoking dump during SAI failure (#1198)
Browse files Browse the repository at this point in the history
* [sai_failure_dump]Invoking dump during SAI failure
  • Loading branch information
dgsudharsan committed Feb 2, 2023
1 parent 402eb14 commit 0434b62
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 7 deletions.
7 changes: 7 additions & 0 deletions lib/RedisRemoteSaiInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2077,6 +2077,7 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(
case SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW:
case SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW:
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
break;

default:
Expand Down Expand Up @@ -2118,6 +2119,12 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(

break;

case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:

SWSS_LOG_NOTICE("invoked DUMP succeeded");

break;

default:
break;
}
Expand Down
4 changes: 2 additions & 2 deletions lib/sairedis.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ typedef enum _sai_redis_notify_syncd_t

SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW,

SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC
SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC,

SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP
} sai_redis_notify_syncd_t;

typedef enum _sai_redis_communication_mode_t
Expand Down Expand Up @@ -248,5 +249,4 @@ typedef enum _sai_redis_switch_attr_t
* @default 60000
*/
SAI_REDIS_SWITCH_ATTR_SYNC_OPERATION_RESPONSE_TIMEOUT,

} sai_redis_switch_attr_t;
1 change: 1 addition & 0 deletions lib/sairediscommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define SYNCD_INIT_VIEW "INIT_VIEW"
#define SYNCD_APPLY_VIEW "APPLY_VIEW"
#define SYNCD_INSPECT_ASIC "SYNCD_INSPECT_ASIC"
#define SYNCD_INVOKE_DUMP "SYNCD_INVOKE_DUMP"

#define ASIC_STATE_TABLE "ASIC_STATE"
#define TEMP_PREFIX "TEMP_"
Expand Down
7 changes: 7 additions & 0 deletions meta/SaiSerialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2359,6 +2359,9 @@ std::string sai_serialize(
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
return SYNCD_INSPECT_ASIC;

case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
return SYNCD_INVOKE_DUMP;

default:

SWSS_LOG_THROW("unknown value on sai_redis_notify_syncd_t: %d", value);
Expand Down Expand Up @@ -4543,6 +4546,10 @@ void sai_deserialize(
{
value = SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC;
}
else if (s == SYNCD_INVOKE_DUMP)
{
value = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
}
else
{
SWSS_LOG_THROW("enum %s not found in sai_redis_notify_syncd_t", s.c_str());
Expand Down
19 changes: 17 additions & 2 deletions syncd/Syncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "swss/select.h"
#include "swss/tokenize.h"
#include "swss/notificationproducer.h"
#include "swss/exec.h"

#include "meta/sai_serialize.h"
#include "meta/ZeroMQSelectableChannel.h"
Expand All @@ -34,6 +35,7 @@
#include <algorithm>

#define DEF_SAI_WARM_BOOT_DATA_FILE "/var/warmboot/sai-warmboot.bin"
#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"

using namespace syncd;
using namespace saimeta;
Expand Down Expand Up @@ -3247,6 +3249,7 @@ sai_status_t Syncd::processNotifySyncd(
SWSS_LOG_ENTER();

auto& key = kfvKey(kco);
sai_status_t status = SAI_STATUS_SUCCESS;

if (!m_commandLineOptions->m_enableTempView)
{
Expand All @@ -3259,6 +3262,20 @@ sai_status_t Syncd::processNotifySyncd(

auto redisNotifySyncd = sai_deserialize_redis_notify_syncd(key);

if (redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP)
{
SWSS_LOG_NOTICE("Invoking SAI failure dump");
std::string ret_str;
int ret = swss::exec(SAI_FAILURE_DUMP_SCRIPT, ret_str);
if (ret != 0)
{
SWSS_LOG_ERROR("Error in executing SAI failure dump %s", ret_str.c_str());
status = SAI_STATUS_FAILURE;
}
sendNotifyResponse(status);
return status;
}

if (m_veryFirstRun && m_firstInitWasPerformed && redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW)
{
/*
Expand All @@ -3273,7 +3290,6 @@ sai_status_t Syncd::processNotifySyncd(
{
SWSS_LOG_NOTICE("very first run is TRUE, op = %s", key.c_str());

sai_status_t status = SAI_STATUS_SUCCESS;

/*
* On the very first start of syncd, "compile" view is directly applied
Expand Down Expand Up @@ -3348,7 +3364,6 @@ sai_status_t Syncd::processNotifySyncd(

SWSS_LOG_WARN("syncd received APPLY VIEW, will translate");

sai_status_t status;

try
{
Expand Down
22 changes: 22 additions & 0 deletions syncd/scripts/sai_failure_dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
#
# Script for sai failure dump
#

# Source the platform specific dump file

SAI_MAX_FAILURE_DUMPS=10

DUMPDIR=/var/log/sai_failure_dump

if [ -f /usr/bin/platform_syncd_dump.sh ]; then
. ./usr/bin/platform_syncd_dump.sh
fi

if [ -z "$(ls -A $DUMPDIR/)" ]; then
exit 0
fi

# Perform rotation

ls -1td $DUMPDIR/* | tail -n +$(($SAI_MAX_FAILURE_DUMPS+1)) | xargs rm -rf
3 changes: 3 additions & 0 deletions syncd/scripts/syncd_init_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ if [[ "$(cat /proc/cmdline)" != *"SONIC_BOOT_TYPE=fast-reboot"* ]]; then
CMD_ARGS+=" -u"
fi

# Create a folder for SAI failure dump files
mkdir -p /var/log/sai_failure_dump/

# Use bulk APIs in SAI
# currently disabled since most vendors don't support that yet
# CMD_ARGS+=" -l"
Expand Down
49 changes: 46 additions & 3 deletions syncd/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,26 @@ using namespace syncd;
if ((status)!=SAI_STATUS_SUCCESS) \
SWSS_LOG_THROW(format ": %s", ##__VA_ARGS__, sai_serialize_status(status).c_str());

#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"

#define CHECK_STATUS(x) \
if (status != SAI_STATUS_SUCCESS) { exit(1); }


using namespace saimeta;

std::string mockCallArg;

namespace swss {
int exec(const std::string &cmd, std::string &stdout)
{
SWSS_LOG_ENTER();

mockCallArg=cmd;
return 0;
}
}

static std::shared_ptr<swss::DBConnector> g_db1;

static sai_next_hop_group_api_t test_next_hop_group_api;
Expand Down Expand Up @@ -652,9 +671,6 @@ void test_bulk_route_set()
ASSERT_SUCCESS("Failed to bulk remove route entry");
}

#define CHECK_STATUS(x) \
if (status != SAI_STATUS_SUCCESS) { exit(1); }

void syncdThread()
{
SWSS_LOG_ENTER();
Expand All @@ -678,6 +694,31 @@ void syncdThread()
syncd->run();
}

void test_invoke_dump()
{
SWSS_LOG_ENTER();
clearDB();

auto syncd = std::make_shared<std::thread>(syncdThread);
syncd->detach();

sai_attribute_t attr;
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;

auto sairedis = std::make_shared<sairedis::Sai>();

sai_status_t status = sairedis->initialize(0, &test_services);

CHECK_STATUS(status);

status = sairedis->set(SAI_OBJECT_TYPE_SWITCH, SAI_NULL_OBJECT_ID, &attr);

ASSERT_SUCCESS("Failed to invoke dump");
assert(mockCallArg == SAI_FAILURE_DUMP_SCRIPT);
}


void test_bulk_route_create()
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -869,6 +910,8 @@ int main()
printf("\n[ %s ]\n\n", sai_serialize_status(SAI_STATUS_SUCCESS).c_str());

test_watchdog_timer_clock_rollback();

test_invoke_dump();
}
catch (const std::exception &e)
{
Expand Down

0 comments on commit 0434b62

Please sign in to comment.