Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sai_failure_dump]Invoking dump during SAI failure #1198

Merged
merged 3 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/RedisRemoteSaiInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2077,6 +2077,7 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(
case SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW:
case SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW:
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
break;

default:
Expand Down Expand Up @@ -2118,6 +2119,12 @@ sai_status_t RedisRemoteSaiInterface::sai_redis_notify_syncd(

break;

case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:

SWSS_LOG_NOTICE("invoked DUMP succeeded");

break;

default:
break;
}
Expand Down
4 changes: 2 additions & 2 deletions lib/sairedis.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ typedef enum _sai_redis_notify_syncd_t

SAI_REDIS_NOTIFY_SYNCD_APPLY_VIEW,

SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC
SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC,

SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP
} sai_redis_notify_syncd_t;

typedef enum _sai_redis_communication_mode_t
Expand Down Expand Up @@ -248,5 +249,4 @@ typedef enum _sai_redis_switch_attr_t
* @default 60000
*/
SAI_REDIS_SWITCH_ATTR_SYNC_OPERATION_RESPONSE_TIMEOUT,

} sai_redis_switch_attr_t;
1 change: 1 addition & 0 deletions lib/sairediscommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define SYNCD_INIT_VIEW "INIT_VIEW"
#define SYNCD_APPLY_VIEW "APPLY_VIEW"
#define SYNCD_INSPECT_ASIC "SYNCD_INSPECT_ASIC"
#define SYNCD_INVOKE_DUMP "SYNCD_INVOKE_DUMP"

#define ASIC_STATE_TABLE "ASIC_STATE"
#define TEMP_PREFIX "TEMP_"
Expand Down
7 changes: 7 additions & 0 deletions meta/SaiSerialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2359,6 +2359,9 @@ std::string sai_serialize(
case SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC:
return SYNCD_INSPECT_ASIC;

case SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP:
return SYNCD_INVOKE_DUMP;

default:

SWSS_LOG_THROW("unknown value on sai_redis_notify_syncd_t: %d", value);
Expand Down Expand Up @@ -4543,6 +4546,10 @@ void sai_deserialize(
{
value = SAI_REDIS_NOTIFY_SYNCD_INSPECT_ASIC;
}
else if (s == SYNCD_INVOKE_DUMP)
{
value = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
}
else
{
SWSS_LOG_THROW("enum %s not found in sai_redis_notify_syncd_t", s.c_str());
Expand Down
19 changes: 17 additions & 2 deletions syncd/Syncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "swss/select.h"
#include "swss/tokenize.h"
#include "swss/notificationproducer.h"
#include "swss/exec.h"

#include "meta/sai_serialize.h"
#include "meta/ZeroMQSelectableChannel.h"
Expand All @@ -34,6 +35,7 @@
#include <algorithm>

#define DEF_SAI_WARM_BOOT_DATA_FILE "/var/warmboot/sai-warmboot.bin"
#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"

using namespace syncd;
using namespace saimeta;
Expand Down Expand Up @@ -3247,6 +3249,7 @@ sai_status_t Syncd::processNotifySyncd(
SWSS_LOG_ENTER();

auto& key = kfvKey(kco);
sai_status_t status = SAI_STATUS_SUCCESS;

if (!m_commandLineOptions->m_enableTempView)
{
Expand All @@ -3259,6 +3262,20 @@ sai_status_t Syncd::processNotifySyncd(

auto redisNotifySyncd = sai_deserialize_redis_notify_syncd(key);

if (redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP)
{
SWSS_LOG_NOTICE("Invoking SAI failure dump");
std::string ret_str;
int ret = swss::exec(SAI_FAILURE_DUMP_SCRIPT, ret_str);
if (ret != 0)
{
SWSS_LOG_ERROR("Error in executing SAI failure dump %s", ret_str.c_str());
status = SAI_STATUS_FAILURE;
}
sendNotifyResponse(status);
return status;
}

if (m_veryFirstRun && m_firstInitWasPerformed && redisNotifySyncd == SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW)
{
/*
Expand All @@ -3273,7 +3290,6 @@ sai_status_t Syncd::processNotifySyncd(
{
SWSS_LOG_NOTICE("very first run is TRUE, op = %s", key.c_str());

sai_status_t status = SAI_STATUS_SUCCESS;
dgsudharsan marked this conversation as resolved.
Show resolved Hide resolved

/*
* On the very first start of syncd, "compile" view is directly applied
Expand Down Expand Up @@ -3348,7 +3364,6 @@ sai_status_t Syncd::processNotifySyncd(

SWSS_LOG_WARN("syncd received APPLY VIEW, will translate");

sai_status_t status;

try
{
Expand Down
22 changes: 22 additions & 0 deletions syncd/scripts/sai_failure_dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
#
# Script for sai failure dump
#

# Source the platform specific dump file

SAI_MAX_FAILURE_DUMPS=10

DUMPDIR=/var/log/sai_failure_dump

if [ -f /usr/bin/platform_syncd_dump.sh ]; then
. ./usr/bin/platform_syncd_dump.sh
fi

if [ -z "$(ls -A $DUMPDIR/)" ]; then
exit 0
fi

# Perform rotation

ls -1td $DUMPDIR/* | tail -n +$(($SAI_MAX_FAILURE_DUMPS+1)) | xargs rm -rf
3 changes: 3 additions & 0 deletions syncd/scripts/syncd_init_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ if [[ "$(cat /proc/cmdline)" != *"SONIC_BOOT_TYPE=fast-reboot"* ]]; then
CMD_ARGS+=" -u"
fi

# Create a folder for SAI failure dump files
mkdir -p /var/log/sai_failure_dump/

# Use bulk APIs in SAI
# currently disabled since most vendors don't support that yet
# CMD_ARGS+=" -l"
Expand Down
49 changes: 46 additions & 3 deletions syncd/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,26 @@ using namespace syncd;
if ((status)!=SAI_STATUS_SUCCESS) \
SWSS_LOG_THROW(format ": %s", ##__VA_ARGS__, sai_serialize_status(status).c_str());

#define SAI_FAILURE_DUMP_SCRIPT "/usr/bin/sai_failure_dump.sh"

#define CHECK_STATUS(x) \
if (status != SAI_STATUS_SUCCESS) { exit(1); }


using namespace saimeta;

std::string mockCallArg;

namespace swss {
int exec(const std::string &cmd, std::string &stdout)
{
SWSS_LOG_ENTER();

mockCallArg=cmd;
return 0;
}
}

static std::shared_ptr<swss::DBConnector> g_db1;

static sai_next_hop_group_api_t test_next_hop_group_api;
Expand Down Expand Up @@ -652,9 +671,6 @@ void test_bulk_route_set()
ASSERT_SUCCESS("Failed to bulk remove route entry");
}

#define CHECK_STATUS(x) \
if (status != SAI_STATUS_SUCCESS) { exit(1); }

void syncdThread()
{
SWSS_LOG_ENTER();
Expand All @@ -678,6 +694,31 @@ void syncdThread()
syncd->run();
}

void test_invoke_dump()
{
SWSS_LOG_ENTER();
clearDB();

auto syncd = std::make_shared<std::thread>(syncdThread);
syncd->detach();

sai_attribute_t attr;
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;

auto sairedis = std::make_shared<sairedis::Sai>();

sai_status_t status = sairedis->initialize(0, &test_services);

CHECK_STATUS(status);

status = sairedis->set(SAI_OBJECT_TYPE_SWITCH, SAI_NULL_OBJECT_ID, &attr);

ASSERT_SUCCESS("Failed to invoke dump");
assert(mockCallArg == SAI_FAILURE_DUMP_SCRIPT);
}


void test_bulk_route_create()
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -869,6 +910,8 @@ int main()
printf("\n[ %s ]\n\n", sai_serialize_status(SAI_STATUS_SUCCESS).c_str());

test_watchdog_timer_clock_rollback();

test_invoke_dump();
}
catch (const std::exception &e)
{
Expand Down