Skip to content

Commit

Permalink
[Mellanox|FFB]: Add support for Mellanox fast-fast boot (#2294)
Browse files Browse the repository at this point in the history
* [mlnx|ffb] Add support for mellanox fast-fast boot

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>

* [mlnx|ffb]: Add support of "config end" event for mlnx fast-fast boot

Signed-off-by: Volodymyr Samotiy <volodymyrs@mellanox.com>

* [Mellanox|FFB]: Fix review comments

* Change naming convention from "fast-fast" to "fastfast"

Signed-off-by: Volodymyr Samotiy <volodymyrs@mellanox.com>
  • Loading branch information
Volodymyr Samotiy authored and lguohan committed Dec 4, 2018
1 parent 1d655db commit 75b4123
Show file tree
Hide file tree
Showing 16 changed files with 394 additions and 14 deletions.
22 changes: 12 additions & 10 deletions files/build_templates/docker_image_ctl.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,29 @@ function getMountPoint()

function getBootType()
{
local TYPE
case "$(cat /proc/cmdline)" in
*SONIC_BOOT_TYPE=fast*)
TYPE='fast'
;;
*SONIC_BOOT_TYPE=warm*)
local BOOT_TYPE
case "$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)" in
warm*)
TYPE='warm'
;;
fastfast)
TYPE='fastfast'
;;
fast*)
TYPE='fast'
;;
*)
TYPE='cold'
esac
echo $TYPE
echo "${TYPE}"
}

function preStartAction()
{
{%- if docker_container_name == "database" %}
WARM_DIR=/host/warmboot
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
# Load redis content from /host/warm-reboot/dump.rdb
# Load redis content from /host/warmboot/dump.rdb
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
else
# Create an emtpy file and overwrite any RDB if already there
Expand All @@ -46,7 +49,6 @@ function postStartAction()
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
sleep 1;
done

if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
rm -f $WARM_DIR/dump.rdb
else
Expand All @@ -59,7 +61,7 @@ function postStartAction()
fi
{%- elif docker_container_name == "swss" %}
docker exec swss rm -f /ready # remove cruft
if [[ "$BOOT_TYPE" == "fast" && -d /host/fast-reboot ]]; then
if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/
Expand Down
2 changes: 2 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ sudo cp {{src}} $FILESYSTEM_ROOT/{{dst}}
{% if sonic_asic_platform == "mellanox" %}
sudo mkdir -p $FILESYSTEM_ROOT/etc/mlnx/
sudo cp target/files/$MLNX_FW_FILE $FILESYSTEM_ROOT/etc/mlnx/fw-SPC.mfa
sudo cp target/files/$MLNX_FFB_SCRIPT $FILESYSTEM_ROOT/usr/bin/mlnx-ffb.sh
j2 platform/mellanox/mlnx-fw-upgrade.j2 | sudo tee $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
j2 platform/mellanox/sdk-version.j2 | sudo tee $FILESYSTEM_ROOT/etc/mlnx/sdk-version
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
{% endif %}

Expand Down
6 changes: 5 additions & 1 deletion files/scripts/swss.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ start() {

# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
# Don't flush APP_DB during MLNX fastfast boot
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
fi
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"
Expand Down
3 changes: 2 additions & 1 deletion platform/mellanox/docker-syncd-mlnx-rpc.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

DOCKER_SYNCD_MLNX_RPC = docker-syncd-mlnx-rpc.gz
$(DOCKER_SYNCD_MLNX_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mlnx-rpc
$(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) $(MLNX_SFPD)
$(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) $(MLNX_SFPD) $(MLNX_ISSU)
$(DOCKER_SYNCD_MLNX_RPC)_LOAD_DOCKERS += $(DOCKER_SYNCD_MLNX)
SONIC_DOCKER_IMAGES += $(DOCKER_SYNCD_MLNX_RPC)
ifeq ($(ENABLE_SYNCD_RPC),y)
Expand All @@ -13,3 +13,4 @@ $(DOCKER_SYNCD_MLNX_RPC)_CONTAINER_NAME = syncd
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += --net=host --privileged -t
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /host/machine.conf:/etc/machine.conf
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /host/warmboot:/var/warmboot
3 changes: 2 additions & 1 deletion platform/mellanox/docker-syncd-mlnx.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

DOCKER_SYNCD_MLNX = docker-syncd-mlnx.gz
$(DOCKER_SYNCD_MLNX)_PATH = $(PLATFORM_PATH)/docker-syncd-mlnx
$(DOCKER_SYNCD_MLNX)_DEPENDS += $(SYNCD) $(PYTHON_SDK_API) $(MLNX_SFPD) $(CRIU)
$(DOCKER_SYNCD_MLNX)_DEPENDS += $(SYNCD) $(PYTHON_SDK_API) $(MLNX_SFPD) $(CRIU) $(MLNX_ISSU)
$(DOCKER_SYNCD_MLNX)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE)
SONIC_DOCKER_IMAGES += $(DOCKER_SYNCD_MLNX)
ifneq ($(ENABLE_SYNCD_RPC),y)
Expand All @@ -13,4 +13,5 @@ $(DOCKER_SYNCD_MLNX)_CONTAINER_NAME = syncd
$(DOCKER_SYNCD_MLNX)_RUN_OPT += --net=host --privileged -t
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/machine.conf:/etc/machine.conf
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/warmboot:/var/warmboot
$(DOCKER_SYNCD_MLNX)_RUN_OPT += --tmpfs /run/criu
6 changes: 6 additions & 0 deletions platform/mellanox/docker-syncd-mlnx/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ supervisorctl start rsyslogd
supervisorctl start syncd

supervisorctl start mlnx-sfpd

BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then
rm -f /var/warmboot/issu_started
/usr/bin/ffb &>/dev/null &
fi
7 changes: 7 additions & 0 deletions platform/mellanox/mlnx-ffb.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# mellanox fast fast boot script

MLNX_FFB_SCRIPT = mlnx-ffb.sh
$(MLNX_FFB_SCRIPT)_PATH = platform/mellanox/
SONIC_COPY_FILES += $(MLNX_FFB_SCRIPT)

export MLNX_FFB_SCRIPT
87 changes: 87 additions & 0 deletions platform/mellanox/mlnx-ffb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/bin/bash

FFB_SUCCESS=0
FFB_FAILURE=1

# Check if ISSU is enabled on this device
check_issu_enabled()
{
CHECK_RESULT="${FFB_FAILURE}"
ISSU_CHECK_CMD="show platform mlnx issu"

# Check whether show ISSU status outputs ENABLED
if [[ `$ISSU_CHECK_CMD` =~ "enabled" ]]; then
# ISSU enabled, return success
CHECK_RESULT="${FFB_SUCCESS}"
fi

return "${CHECK_RESULT}"
}

# Check if ISSU upgrade from current SDK to next image SDK is supported
check_sdk_upgrade()
{
CHECK_RESULT="${FFB_FAILURE}"

NEXT_SONIC_IMAGE="$(sonic_installer list | grep "Next: " | cut -f2 -d' ')"
CURRENT_SONIC_IMAGE="$(sonic_installer list | grep "Current: " | cut -f2 -d' ')"

FS_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}/fs.squashfs"
FS_MOUNTPOINT="/tmp/image-${NEXT_SONIC_IMAGE#SONiC-OS-}-fs"

if [[ "${CURRENT_SONIC_IMAGE}" == "${NEXT_SONIC_IMAGE}" ]]; then
return "${FFB_SUCCESS}"
fi

while :; do
mkdir -p "${FS_MOUNTPOINT}"
mount -t squashfs "${FS_PATH}" "${FS_MOUNTPOINT}" || {
>&2 echo "Failed to mount next SONiC image"
break;
}

SDK_VERSION_FILE_PATH="${FS_MOUNTPOINT}/etc/mlnx/sdk-version"

[ -f "${SDK_VERSION_FILE_PATH}" ] && {
NEXT_SDK_VERSION="$(cat ${FS_MOUNTPOINT}/etc/mlnx/sdk-version)"
} || {
>&2 echo "No SDK version file ${SDK_VERSION_FILE_PATH}"
break;
}

ISSU_CHECK_CMD="docker exec -t syncd issu --check ${NEXT_SDK_VERSION}"

${ISS_CHECK_CMD} > /dev/null && CHECK_RESULT="${FFB_SUCCESS}"

break
done

umount -rf "${FS_MOUNTPOINT}" 2> /dev/null || true
rm -rf "${FS_MOUNTPOINT}" 2> /dev/null || true

return "${CHECK_RESULT}"
}

# Perform ISSU start
issu_start()
{
ISSU_START_CMD="docker exec -t syncd issu --start"
${ISSU_START_CMD} > /dev/null

EXIT_CODE=$?

touch /host/warmboot/issu_started

return $EXIT_CODE
}

# Perform ISSU end
issu_end()
{
ISSU_END_CMD="docker exec -t syncd issu --end"
${ISSU_END_CMD} > /dev/null

EXIT_CODE=$?

return $EXIT_CODE
}
5 changes: 5 additions & 0 deletions platform/mellanox/mlnx-issu.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# issu (SONiC MLNX platform ISSU tool) Debian package

MLNX_ISSU = python-mlnx-issu_1.0-1_all.deb
$(MLNX_ISSU)_SRC_PATH = $(PLATFORM_PATH)/mlnx-issu
SONIC_PYTHON_STDEB_DEBS += $(MLNX_ISSU)
69 changes: 69 additions & 0 deletions platform/mellanox/mlnx-issu/scripts/ffb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python
"""
Part of Mellanox platform specific fastfast boot implementation for warm-boot.
Notifies SYNCD proccess once boot is finished after warm-reboot.
Once SYNCD received such notification it should set appropriate SAI attribute.
Then SAI will notify SDK to end ISSU mode for the FFB.
"""


import time
import swsssdk
from threading import Timer


class FFB(object):
"""Provides implementation for Mellanox fastfast boot"""
DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp'
DB_STATE_ENTRY_NAME = 'state'
DB_STATE_TYPE_RECONCILED = 'reconciled'
DB_CHANNEL_NAME = 'MLNX_FFB'
DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["<operation>","<data>"]
SUB_THREAD_TIMEOUT = 1
STOP_TIMER_TIMEOUT = 180

def __init__(self):
self.state_db = swsssdk.SonicV2Connector()
self.state_db.connect(self.state_db.STATE_DB)

self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)

self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub()
self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler})

self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish)

def run(self):
# Start event thread in order to get required events
self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT)
# Start oneshot timer in order to exit in case required event is not received during defined timeout
self.timeoutTimer.start()

def finish(self):
# Stop event thread and timeout timer
self.eventThread.stop()
self.timeoutTimer.cancel()

# Publish "FFB END" event to SYNCD process
time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time
self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG)

def eventHandler(self, msg):
# Only "set" operations are needed so just skip all others
if msg['data'] != 'hset':
return

state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)

if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED):
self.finish()
else:
self.prevState = state


def main():
FFB().run()


if __name__ == '__main__':
main()
Loading

0 comments on commit 75b4123

Please sign in to comment.