Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Nvidia-bluefield] Add infrastructure for the DPU NIC FW auto upgrade #20074

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions device/nvidia-bluefield/arm64-nvda_bf-bf3comdpu/pre_reboot_hook
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

declare -r EXIT_SUCCESS=0
declare -r EXIT_ERROR=1

declare -r FW_UPGRADE_SCRIPT="/usr/bin/mlnx-fw-upgrade.sh"

${FW_UPGRADE_SCRIPT} --upgrade --verbose
EXIT_CODE=$?
if [[ ${EXIT_CODE} != ${EXIT_SUCCESS} ]]; then
echo "Failed to burn BF3 DPU NIC FW: errno=${EXIT_CODE}"
exit ${EXIT_ERROR}
fi

exit ${EXIT_SUCCESS}

13 changes: 9 additions & 4 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -1115,19 +1115,24 @@ sudo cp platform/mellanox/smartswitch/dpuctl/dpuctl.service $FILESYSTEM_ROOT_USR
declare -rA FW_FILE_MAP=( \
[$BF3_FW_FILE]="fw-BF3.mfa" \
)
sudo mkdir -p $FILESYSTEM_ROOT/$PLATFORM_DIR/fw/dpu/
sudo mkdir -p $FILESYSTEM_ROOT/etc/bluefield/
sudo mkdir -p $FILESYSTEM_ROOT/$PLATFORM_DIR/fw/asic/
sudo mkdir -p $FILESYSTEM_ROOT/etc/mlnx/
for fw_file_name in ${!FW_FILE_MAP[@]}; do
sudo cp $files_path/$fw_file_name $FILESYSTEM_ROOT/$PLATFORM_DIR/fw/dpu/${FW_FILE_MAP[$fw_file_name]}
sudo ln -s /host/image-$SONIC_IMAGE_VERSION/$PLATFORM_DIR/fw/dpu/${FW_FILE_MAP[$fw_file_name]} $FILESYSTEM_ROOT/etc/bluefield/${FW_FILE_MAP[$fw_file_name]}
sudo cp $files_path/$fw_file_name $FILESYSTEM_ROOT/$PLATFORM_DIR/fw/asic/${FW_FILE_MAP[$fw_file_name]}
sudo ln -s /host/image-$SONIC_IMAGE_VERSION/$PLATFORM_DIR/fw/asic/${FW_FILE_MAP[$fw_file_name]} $FILESYSTEM_ROOT/etc/mlnx/${FW_FILE_MAP[$fw_file_name]}
done

SONIC_PLATFORM={{sonic_asic_platform}} j2 platform/mellanox/mlnx-fw-upgrade.j2 | sudo tee $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh

sudo install -m 755 platform/nvidia-bluefield/byo/sonic-byo.py $FILESYSTEM_ROOT/usr/bin/sonic-byo.py

SONIC_PLATFORM_PY3_WHEEL_NAME=$(basename {{platform_api_py3_wheel_path}})
sudo cp {{platform_api_py3_wheel_path}} $FILESYSTEM_ROOT/$SONIC_PLATFORM_PY3_WHEEL_NAME
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install $SONIC_PLATFORM_PY3_WHEEL_NAME
sudo rm -rf $FILESYSTEM_ROOT/$SONIC_PLATFORM_PY3_WHEEL_NAME

sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install xmlstarlet
{% endif %}

{%- if SONIC_ROUTING_STACK == "frr" %}
Expand Down
4 changes: 4 additions & 0 deletions files/scripts/syncd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ function startplatform() {

if [[ x"$sonic_asic_platform" == x"nvidia-bluefield" ]]; then
/usr/bin/bfnet.sh start
if [[ $? != "0" ]]; then
debug "Failed to start Nvidia Bluefield"
exit 1
fi
fi
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ start()
modprobe mlx5_core
/usr/bin/mst start

/usr/bin/mlnx-fw-upgrade.sh --dry-run -v
if [[ $? != "0" ]]; then
exit 1
fi

hwsku=$(sonic-cfggen -d -v 'DEVICE_METADATA["localhost"]["hwsku"]')
if [[ $hwsku == *"-C1" ]]; then
start_cp_dhclient
Expand Down
102 changes: 36 additions & 66 deletions platform/nvidia-bluefield/installer/create_sonic_image
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# limitations under the License.
#

set -x
set -e

WDIR=
Expand Down Expand Up @@ -184,34 +183,14 @@ add_sonic_to_initramfs() {
cat > scripts/initrd-install << EOF
#!/bin/bash

printf_msg()
{
echo "$@" | tee /dev/kmsg
return 0
}

depmod -a $KERNEL_VERSION > /dev/null 2>&1
insmod /mlx-bootctl.ko
insmod /sdhci-of-dwcmshc.ko
insmod /sbsa_gwdt.ko
/usr/sbin/watchdog

printf_msg "================================="
printf_msg "Installing SONiC. Please wait..."
printf_msg "================================="

/bin/bash /debian/install.sh
if [ \$? -eq 0 ]; then
printf_msg "==================================="
printf_msg "Installation finished. Rebooting..."
printf_msg "==================================="
printf_msg
reboot -f
else
printf_msg "========================"
printf_msg "Failed to install SONiC"
printf_msg "========================"
fi

EOF
chmod +x scripts/initrd-install
sudo rm -f conf/conf.d/debian-core*
Expand All @@ -221,63 +200,37 @@ EOF

copy_bin()
{
from=$(realpath $1)

if [ -e $from/$2 ]; then
bin=$2
if [ -e $1 ]; then
bin=$1
else
bin=$(sudo chroot $from bash -c "which $2 2> /dev/null")
bin=$(which $1 2> /dev/null)
fi

echo "copy from" $from file: $bin

if [ -h $from/$bin ]; then
if [[ $from == "/" ]]; then
tbin=`readlink -f $bin`
else
tbin=`readlink -f $from/$bin | sed -e "s~$from~~"`
fi

if [ ! -e .$tbin ]; then
sudo mkdir -p .`dirname $tbin`
sudo cp $from/$tbin .$tbin
fi

if [ ! -e .${bin} ]; then
sudo mkdir -p .`dirname $bin`
sudo cp -a $from/$bin .${bin}
fi
elif [ -e $from/$bin ]; then
sudo mkdir -p .`dirname $bin`
sudo cp -a $from/$bin .${bin}
else
echo "ERROR: Cannot find $2"
if [ -z "$bin" ]; then
echo "ERROR: Cannot find $1"
exit 1
fi
sudo mkdir -p .$(dirname $bin)
if [ ! -e .${bin} ]; then
sudo cp -a $bin .${bin}
fi

# Copy dependencies
for lib in `ldd $from/$bin 2> /dev/null | grep '=>' | awk '{print $3}'`
for lib in $(ldd $bin 2> /dev/null | grep '=>' | awk '{print $3}')
do
if [ -e .$lib ]; then
continue
fi

sudo mkdir -p .`dirname $lib`
sudo cp -a $from/$lib .$lib
echo sudo cp -a $from/$lib .$lib
if [ -h $from/.$lib ]; then
if [[ $from == "/" ]]; then
tlib=`readlink -f $lib`
else
tlib=`readlink -f $from/$lib | sed -e "s~$from~~"`
fi
sudo mkdir -p .$(dirname $lib)
sudo cp -a $lib .$lib
if [ -h $lib ]; then
tlib=$(readlink -f $lib)
if [ ! -e .$tlib ]; then
sudo mkdir -p .`dirname $tlib`
sudo cp $from/$tlib .$tlib
sudo mkdir -p .$(dirname $tlib)
sudo cp $tlib .$tlib
fi
fi
done
}
}

create_bfb_image() {

Expand Down Expand Up @@ -336,7 +289,24 @@ create_bfb_image() {
if [ -d $tool ]; then
continue
fi
copy_bin / $tool
copy_bin $tool
done

kernel_mft=$(dpkg -l | grep kernel-mft-dkms-modules | awk '/^ii/ {print $2}')
if [[ $kernel_mft == "" ]]; then
echo "ERROR: kernel-mft-dkms-modules package is not installed"
exit 1
fi

for tool in `dpkg -L mft` \
`dpkg -L mft-oem` \
`dpkg -L $kernel_mft` \
`dpkg -L xmlstarlet | grep -v share`
do
if [ -d $tool ]; then
continue
fi
copy_bin $tool
done

sudo depmod -a -b ./ $KERNEL_VERSION
Expand Down
101 changes: 74 additions & 27 deletions platform/nvidia-bluefield/installer/install.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,43 @@
# This will setup the disk, grub etc for the actual SONiC to boot from

# NOTE: Replace these flag at build time
IMAGE_VERSION="{{IMAGE_VERSION}}"
INSTALLER_PAYLOAD="{{INSTALLER_PAYLOAD}}"
FILESYSTEM_DOCKERFS="{{FILESYSTEM_DOCKERFS}}"
DOCKERFS_DIR="{{DOCKERFS_DIR}}"
FILESYSTEM_SQUASHFS="{{FILESYSTEM_SQUASHFS}}"
KERNEL_VERSION="{{KERNEL_VERSION}}"
BF2_GRUB_CFG="{{BF2_GRUB_CFG}}"
BF3_GRUB_CFG="{{BF3_GRUB_CFG}}"

image_dir="image-$IMAGE_VERSION"
demo_volume_revision_label="SONiC-OS-${IMAGE_VERSION}"

PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/mellanox/scripts"
CHROOT_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"

rshimlog=`which bfrshlog 2> /dev/null`
distro="SONiC"
declare -r IMAGE_VERSION="{{IMAGE_VERSION}}"
declare -r INSTALLER_PAYLOAD="{{INSTALLER_PAYLOAD}}"
declare -r FILESYSTEM_DOCKERFS="{{FILESYSTEM_DOCKERFS}}"
declare -r DOCKERFS_DIR="{{DOCKERFS_DIR}}"
declare -r FILESYSTEM_SQUASHFS="{{FILESYSTEM_SQUASHFS}}"
declare -r KERNEL_VERSION="{{KERNEL_VERSION}}"
declare -r BF2_GRUB_CFG="{{BF2_GRUB_CFG}}"
declare -r BF3_GRUB_CFG="{{BF3_GRUB_CFG}}"

declare -r image_dir="image-$IMAGE_VERSION"
declare -r demo_volume_revision_label="SONiC-OS-${IMAGE_VERSION}"

declare -r rshimlog=`which bfrshlog 2> /dev/null`
declare -r distro="SONiC"

declare -r device_nvmv=/dev/nvme0n1
declare -r device_emmc=/dev/mmcblk0

pn=$(dmidecode -t 4 | grep "Part Number" | awk '{split($NF,a,"-"); print tolower(a[1])}')
declare -r platform=arm64-nvda_bf-$pn

# The reboot of the DPU from the installer is required only for standalon platform.
# In Smart Switch the reboot is triggered by the host when the DPU notifies it that the installation is finished.
if [[ $(echo $platform | grep 9009d3b600) != "" ]]; then
declare -r reboot_is_needed=true
declare -r fw_upgrade_is_needed=false
else
declare -r reboot_is_needed=false
declare -r fw_upgrade_is_needed=true
fi

device_nvmv=/dev/nvme0n1
device_emmc=/dev/mmcblk0
declare -r capsule=/lib/firmware/mellanox/boot/capsule/boot_update2.cap

capsule=/lib/firmware/mellanox/boot/capsule/boot_update2.cap
run_bash_session()
{
/bin/bash </dev/ttyAMA0 >/dev/ttyAMA0 2>&1
}

rshim_log()
{
Expand Down Expand Up @@ -71,7 +86,7 @@ function_exists()
# Check auto configuration passed from boot-fifo
#

boot_fifo_path="/sys/bus/platform/devices/MLNXBF04:00/bootfifo"
declare -r boot_fifo_path="/sys/bus/platform/devices/MLNXBF04:00/bootfifo"
if [ -e "${boot_fifo_path}" ]; then
cfg_file=$(mktemp)
# Get 16KB assuming it's big enough to hold the config file.
Expand Down Expand Up @@ -203,9 +218,6 @@ unzip -op /debian/$INSTALLER_PAYLOAD "$FILESYSTEM_DOCKERFS" | tar xz --warning=n
mkdir -p /mnt/$image_dir/platform
unzip -op /debian/$INSTALLER_PAYLOAD "platform.tar.gz" | tar xz --warning=no-timestamp -f - -C /mnt/$image_dir/platform

platform=$(dmidecode -t 4 | grep "Part Number" | awk '{split($NF,a,"-"); print tolower(a[1])}')
platform=arm64-nvda_bf-$platform

# Copy in the machine.conf file
cat <<EOF > /mnt/machine.conf
onie_arch=arm64
Expand All @@ -215,6 +227,33 @@ EOF
chmod a+r /mnt/machine.conf

sync

if [[ $fw_upgrade_is_needed == "true" ]]; then
sonic_fs_path="/mnt/$image_dir/fs.squashfs"
sonic_fs_mountpoint="/tmp/$image_dir-fs"

ex mkdir -p $sonic_fs_mountpoint
ex mount -t squashfs $sonic_fs_path $sonic_fs_mountpoint

ex mkdir -p /etc/mlnx/

ex ln -s /mnt/$image_dir/platform/fw/asic/fw-BF3.mfa /etc/mlnx/fw-BF3.mfa

ex mst start

ex $sonic_fs_mountpoint/usr/bin/mlnx-fw-upgrade.sh --update -v
if [[ $? != 0 ]]; then
log "ERROR: FW update failed"
fi

ex umount $sonic_fs_mountpoint
fi

if function_exists bfb_post_sonic_install; then
log "Running bfb_post_sonic_install from bf.cfg"
bfb_post_sonic_install
fi

{% if SECURE_UPGRADE_MODE in ['dev', 'prod'] %}
demo_volume_label="SONiC-OS"
log "creating demo_volume_label=$demo_volume_label dir under EFI partition to include all boot related modules"
Expand Down Expand Up @@ -384,7 +423,15 @@ if function_exists bfb_post_install; then
ex bfb_post_install
fi


rshim_log "Installation finished"
rshim_log "Rebooting..."
# Wait for these messages to be pulled by the rshim service
sleep 3

if [[ $reboot_is_needed == "true" ]]; then
rshim_log "Rebooting..."
reboot -f
else
rshim_log "Waiting for reset from the host..."
while true; do
sleep 1
done
fi
2 changes: 1 addition & 1 deletion platform/nvidia-bluefield/recipes/installer-image.mk
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ SONIC_BF_IMAGE_BFB = $(SONIC_BF_IMAGE_BASE).bfb
$(SONIC_BF_IMAGE_BFB)_IMAGE_TYPE = bfb
$(SONIC_BF_IMAGE_BFB)_MACHINE = $($(SONIC_BF_IMAGE_BASE)_MACHINE)
$(SONIC_BF_IMAGE_BFB)_INSTALLS += $($(SONIC_BF_IMAGE_BASE)_INSTALLS)
$(SONIC_BF_IMAGE_BFB)_DEPENDS += $($(SONIC_BF_IMAGE_BASE)_DEPENDS)
$(SONIC_BF_IMAGE_BFB)_DEPENDS += $($(SONIC_BF_IMAGE_BASE)_DEPENDS) $(MFT) $(MFT_OEM) $(KERNEL_MFT)
$(SONIC_BF_IMAGE_BFB)_DOCKERS += $($(SONIC_BF_IMAGE_BASE)_DOCKERS)
$(SONIC_BF_IMAGE_BFB)_LAZY_INSTALLS += $($(SONIC_BF_IMAGE_BASE)_LAZY_INSTALLS)
$(SONIC_BF_IMAGE_BFB)_FILES += $($(SONIC_BF_IMAGE_BASE)_FILES)
Expand Down
6 changes: 4 additions & 2 deletions sonic-slave-bookworm/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ RUN apt-get update && apt-get install -y eatmydata && eatmydata apt-get install
libprotobuf-dev \
# For sonic-dhcp6relay build
libjsoncpp-dev \
#for nvidia-blufield driver compilation
#for nvidia-bluefield driver compilation
pciutils \
dh-dkms \
rpm2cpio \
Expand All @@ -441,7 +441,9 @@ RUN apt-get update && apt-get install -y eatmydata && eatmydata apt-get install
efibootmgr \
watchdog \
dmidecode \
# For nvidia-blufield sdk compilation
usbutils \
xmlstarlet \
# For nvidia-bluefield sdk compilation
cython3 \
pandoc \
valgrind \
Expand Down
Loading