From 425e7db2c297ee94927de12e030d1a7c36c53930 Mon Sep 17 00:00:00 2001 From: Ric Li Date: Tue, 14 Nov 2023 09:31:31 +0800 Subject: [PATCH] docker: remove 'privileged' flag for container (#578) Signed-off-by: Ric Li --- docker/README.md | 82 +++++++++++++++++++++++++++++++++++---- docker/docker-compose.yml | 17 ++++++++ script/nicctl.sh | 45 ++++++++++++++++----- 3 files changed, 128 insertions(+), 16 deletions(-) create mode 100644 docker/docker-compose.yml diff --git a/docker/README.md b/docker/README.md index f29b2ac3e..cc3c5a94b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,6 +1,6 @@ # Docker guide -Docker guide for IntelĀ® Media Transport Library +Docker guide for IntelĀ® Media Transport Library. ## 1. Build Docker image @@ -11,7 +11,9 @@ docker build -t mtl:latest -f ubuntu.dockerfile ./ Refer to below build command if you are in a proxy env. ```bash -docker build -t mtl:latest -f ubuntu.dockerfile --build-arg HTTP_PROXY=http://proxy.xxx.com:xxx --build-arg HTTPS_PROXY=https://proxy.xxx.com:xxx ./ +http_proxy=http://proxy.xxx.com:xxx +https_proxy=https://proxy.xxx.com:xxx +docker build -t mtl:latest -f ubuntu.dockerfile --build-arg HTTP_PROXY=$http_proxy --build-arg HTTPS_PROXY=$https_proxy ./ ``` ## 2. DPDK NIC PMD and env setup on host @@ -24,19 +26,85 @@ The sample usage provided below is enabled with specific privileged settings suc ### 3.1 Run the docker container -The argument `/dev/vfio/` enables the Docker instance to access the VFIO device. - -The arguments `/dev/null, /tmp/kahawai_lcore.lock, and --ipc=host` and touch `/tmp/kahawai_lcore.lock` command are used for managing shared memory within IMTL, primarily for lcore management across multiple IMTL docker containers. +#### 3.1.1 Run multiple docker container with SHM requirement ```bash touch /tmp/kahawai_lcore.lock -docker run --privileged -it -v /dev/vfio/:/dev/vfio/ -v /dev/null:/dev/null -v /tmp/kahawai_lcore.lock:/tmp/kahawai_lcore.lock --ipc=host mtl:latest +docker run -it \ + --device /dev/vfio \ + --cap-add SYS_NICE \ + --cap-add IPC_LOCK \ + --cap-add NET_ADMIN \ + --cap-add SYS_TIME \ + --cap-add NET_RAW \ + -v /tmp/kahawai_lcore.lock:/tmp/kahawai_lcore.lock \ + -v /dev/null:/dev/null \ + --ipc=host \ + mtl:latest ``` +Explanation of Docker arguments: + +| Argument | Description | +| --- | --- | +| `--device /dev/vfio` | Access the VFIO device | +| `--cap-add SYS_NICE` | For set_mempolicy | +| `--cap-add IPC_LOCK` | For DMA mapping | +| `--cap-add NET_ADMIN` | Optional, for kernel NIC configuration | +| `--cap-add SYS_TIME` | Optional, for systime adjustment | +| `--cap-add NET_RAW` | Optional, for AF_XDP socket | +| `-v /tmp/kahawai_lcore.lock:/tmp/kahawai_lcore.lock` | For multiple instances lcore management | +| `-v /dev/null:/dev/null` | For multiple instances lcore management | +| `--ipc=host` | For multiple instances lcore management | + +#### 3.1.2 Run single docker container + If you confirm that all IMTL processes will run within a single Docker container, you can disregard the settings related to shared memory. Simply execute the following command: ```bash -docker run --privileged -it -v /dev/vfio/:/dev/vfio/ mtl:latest +docker run -it \ + --device /dev/vfio \ + --cap-add SYS_NICE \ + --cap-add IPC_LOCK \ + --cap-add NET_ADMIN \ + --cap-add SYS_TIME \ + --cap-add NET_RAW \ + mtl:latest +``` + +#### 3.1.3 Specify NIC devices for container + +If you only need to pass specific NICs to the container, you can use the following command to list the IOMMU group: + +```bash +../script/nicctl.sh list all + +ID PCI BDF Driver NUMA IOMMU IF Name +0 0000:4b:01.0 vfio-pci 0 311 * +1 0000:4b:01.1 vfio-pci 0 312 * +``` + +Then, you can specify the IOMMU group IDs to the `--device` argument: + +```bash +docker run -it \ + --device /dev/vfio/vfio \ + --device /dev/vfio/311 \ + --device /dev/vfio/312 \ + --cap-add SYS_NICE \ + --cap-add IPC_LOCK \ + mtl:latest +``` + +### 3.1.4 Run with docker-compose + +Edit the `docker-compose.yml` file to specify the configuration. + +Run the service: + +```bash +docker-compose run imtl +# docker compose run imtl ``` ### 3.2 Switch to the root user inside a Docker container diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 000000000..82293c64c --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,17 @@ +version: '3' + +services: + imtl: + image: mtl:latest + devices: + - "/dev/vfio:/dev/vfio" # or add /dev/vfio/vfio and /dev/vfio/ + ipc: host + volumes: + - "/tmp/kahawai_lcore.lock:/tmp/kahawai_lcore.lock" + - "/dev/null:/dev/null" + cap_add: + - SYS_NICE + - IPC_LOCK + - NET_ADMIN + - SYS_TIME + - NET_RAW \ No newline at end of file diff --git a/script/nicctl.sh b/script/nicctl.sh index 27544e402..08b71d8d2 100755 --- a/script/nicctl.sh +++ b/script/nicctl.sh @@ -9,14 +9,14 @@ if [ $# -lt 2 ]; then echo "Usage: " echo " $0 [args]" echo "Commands:" - echo " bind_pmd bind driver to DPDK PMD driver" - echo " bind_kernel bind driver to kernel driver" - echo " create_vf create VFs and bind to VFIO" - echo " create_kvf create VFs and bind to kernel driver" - echo " create_tvf create trusted VFs and bind to VFIO" - echo " create_dcf_vf create DCF VFs and bind to VFIO" + echo " bind_pmd Bind driver to DPDK PMD driver" + echo " bind_kernel Bind driver to kernel driver" + echo " create_vf Create VFs and bind to VFIO" + echo " create_kvf Create VFs and bind to kernel driver" + echo " create_tvf Create trusted VFs and bind to VFIO" + echo " create_dcf_vf Create DCF VFs and bind to VFIO" echo " disable_vf Disable VF" - echo " status List the DPDK port status" + echo " list all List all NIC devices and the brief" exit 0 fi @@ -112,7 +112,29 @@ create_kvf() { done } -cmdlist=("bind_kernel" "create_vf" "create_kvf" "create_tvf" "disable_vf" "bind_pmd" "create_dcf_vf" "status") +list() { + printf "%-4s\t%-12s\t%-12s\t%-4s\t%-6s\t%-10s\n" "ID" "PCI BDF" "Driver" "NUMA" "IOMMU" "IF Name" + + id_counter=0 + + for pci_bdf in $(dpdk-devbind.py -s | awk '/^Network devices/ {show=1; next} /^$/ {show=0} show && /drv=/ {print $1}'); do + + driver=$(basename "$(readlink /sys/bus/pci/devices/"${pci_bdf}"/driver)" 2>/dev/null || echo "N/A") + + numa_node=$(cat /sys/bus/pci/devices/"${pci_bdf}"/numa_node 2>/dev/null || echo "N/A") + + iommu_group=$(basename "$(readlink /sys/bus/pci/devices/"${pci_bdf}"/iommu_group)" 2>/dev/null || echo "N/A") + + interface_name=$(basename /sys/bus/pci/devices/"${pci_bdf}"/net/*) + + printf "%-4s\t%-12s\t%-12s\t%-4s\t%-6s\t%-10s\n" \ + "$id_counter" "$pci_bdf" "$driver" "$numa_node" "$iommu_group" "$interface_name" + + id_counter=$((id_counter + 1)) + done +} + +cmdlist=("bind_kernel" "create_vf" "create_kvf" "create_tvf" "disable_vf" "bind_pmd" "create_dcf_vf" "list") for c in "${cmdlist[@]}"; do if [ "$c" == "$1" ]; then @@ -126,6 +148,11 @@ if [ -z "$cmd" ]; then exit 1 fi +if [ "$cmd" == "list" ]; then + list + exit 0 +fi + bdf=$2 bdf_stat=$(dpdk-devbind.py -s | { grep "$bdf" || true; }) if [ -z "$bdf_stat" ]; then @@ -158,7 +185,7 @@ if [ "$cmd" == "bind_pmd" ]; then exit 0 fi -# suppose bind kernel should be called +# suppose bind kernel should be called for following commands if [ -z "$inf" ]; then bind_kernel inf=$(dpdk-devbind.py -s | grep "$bdf.*if" | sed -e s/.*if=//g | awk '{print $1;}')