From 8fe8a4a2b0973b065263a7cccf3b315e59e67df0 Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Fri, 9 Feb 2024 17:55:24 -0800 Subject: [PATCH] Skip loading openvswitch Kernel module if built-in If a module is built-in, trying to load the module with modprobe inside a container may fail (insted of just being a no-op). This will cause Antrea initialization to fail unless agent.dontLoadKernelModules is explicitly set to true. Now that the Docker Desktop LinuxKit VM comes with openvswitch built-in (starting with 4.27.0), trying to install "default" Antrea (i.e., without setting agent.dontLoadKernelModules) in a Kind cluster running with Docker Desktop on macOS will fail. To make sure that users will not run into this issue, we add logic to the install_cni script to skip the modprobe call if the module is built-in. After this agent, there should be very limited use cases for the agent.dontLoadKernelModules parameter, but there is no harm in keeping in case it is needed in the future or for some corner cases. I also realized that the "--skip-kmod" flag for the start_ovs script did not provide any value. Either the openvswitch module needs to be explicitly loaded, in which case the install_cni script will take care of it anyway, or it should not be loaded at all (e.g., because it is built-in). Additionally, because we do not mount the host's /lib/modules to the antrea-ovs container, it is not possible to load any kernel module from the container. Fixes #5939 Signed-off-by: Antonin Bas --- build/charts/antrea/README.md | 2 +- .../antrea/templates/agent/daemonset.yaml | 3 --- build/charts/antrea/values.yaml | 2 ++ build/images/scripts/install_cni | 21 +++++++++++++----- build/images/scripts/install_cni_chaining | 12 ++++++++-- build/images/scripts/module_utils | 19 ++++++++++++++++ build/images/scripts/start_ovs | 22 +++++++++---------- 7 files changed, 58 insertions(+), 23 deletions(-) create mode 100644 build/images/scripts/module_utils diff --git a/build/charts/antrea/README.md b/build/charts/antrea/README.md index 455524ef9bd..122d9c626f8 100644 --- a/build/charts/antrea/README.md +++ b/build/charts/antrea/README.md @@ -39,7 +39,7 @@ Kubernetes: `>= 1.16.0-0` | agent.antreaOVS.securityContext.privileged | bool | `false` | Run the antrea-ovs container as privileged. | | agent.apiPort | int | `10350` | Port for the antrea-agent APIServer to serve on. | | agent.dnsPolicy | string | `""` | DNS Policy for the antrea-agent Pods. If empty, the Kubernetes default will be used. | -| agent.dontLoadKernelModules | bool | `false` | Do not try to load any of the required Kernel modules (e.g., openvswitch) during initialization of the antrea-agent. Most users should never need to set this to true, but it may be required with some specific distributions. | +| agent.dontLoadKernelModules | bool | `false` | Do not try to load any of the required Kernel modules (e.g., openvswitch) during initialization of the antrea-agent. Most users should never need to set this to true, but it may be required with some specific distributions. Note that we will never try to load a module if we can detect that it is "built-in", regardless of this value. | | agent.enablePrometheusMetrics | bool | `true` | Enable metrics exposure via Prometheus. | | agent.extraVolumes | list | `[]` | Additional volumes for antrea-agent Pods. | | agent.installCNI.extraEnv | object | `{}` | Extra environment variables to be injected into install-cni. | diff --git a/build/charts/antrea/templates/agent/daemonset.yaml b/build/charts/antrea/templates/agent/daemonset.yaml index cc90ee4d6ea..3afd45aa41e 100644 --- a/build/charts/antrea/templates/agent/daemonset.yaml +++ b/build/charts/antrea/templates/agent/daemonset.yaml @@ -267,9 +267,6 @@ spec: {{- if .Values.ovs.hwOffload }} - "--hw-offload" {{- end }} - {{- if .Values.agent.dontLoadKernelModules }} - - "--skip-kmod" - {{- end }} {{- with .Values.agent.antreaOVS.extraArgs }} {{- toYaml . | trim | nindent 12 }} {{- end }} diff --git a/build/charts/antrea/values.yaml b/build/charts/antrea/values.yaml index afc096b8f98..09dd32a957d 100644 --- a/build/charts/antrea/values.yaml +++ b/build/charts/antrea/values.yaml @@ -245,6 +245,8 @@ agent: # -- Do not try to load any of the required Kernel modules (e.g., openvswitch) # during initialization of the antrea-agent. Most users should never need to # set this to true, but it may be required with some specific distributions. + # Note that we will never try to load a module if we can detect that it is + # "built-in", regardless of this value. dontLoadKernelModules: false installCNI: # -- Extra environment variables to be injected into install-cni. diff --git a/build/images/scripts/install_cni b/build/images/scripts/install_cni index 7e1d7d21b5f..21313327f4e 100755 --- a/build/images/scripts/install_cni +++ b/build/images/scripts/install_cni @@ -16,6 +16,8 @@ set -euo pipefail +source module_utils + # Fetching the list of the binaries that user wants to skip installing. IFS=',' read -r -a binaries <<< "${SKIP_CNI_BINARIES:-}" # Todo: check version and continue installation only for a newer version @@ -55,12 +57,21 @@ install -m 644 /etc/antrea/antrea-cni.conflist /host/etc/cni/net.d/10-antrea.con rm -f /host/etc/cni/net.d/10-antrea.conf if [[ -z "${SKIP_LOADING_KERNEL_MODULES:-}" ]]; then - # Load the OVS kernel module - modprobe openvswitch || (echo "Failed to load the OVS kernel module from the container, try running 'modprobe openvswitch' on your Nodes"; exit 1) + # Load the OVS kernel module if not built-in + if ! is_module_builtin "openvswitch"; then + modprobe openvswitch || (echo "Failed to load the OVS kernel module from the container, try running 'modprobe openvswitch' on your Nodes"; exit 1) + else + echo "Module openvswitch is built-in" + fi - # Load the WireGuard kernel module. This is only required when WireGuard encryption is enabled. - # We could parse the antrea config file in the init-container to dynamically load this kernel module in the future. - modprobe wireguard || (echo "Failed to load the WireGuard kernel module, WireGuard encryption will not be available") + # Load the WireGuard kernel module if not built-in. This is only required when WireGuard + # encryption is enabled. We could parse the antrea config file in the init-container to + # dynamically load this kernel module in the future. + if ! is_module_builtin "wireguard"; then + modprobe wireguard || (echo "Failed to load the WireGuard kernel module, WireGuard encryption will not be available") + else + echo "Module wireguard is built-in" + fi fi # Change the default permissions of the run directory. diff --git a/build/images/scripts/install_cni_chaining b/build/images/scripts/install_cni_chaining index 8ef6a8aa5cb..f364a8f730a 100755 --- a/build/images/scripts/install_cni_chaining +++ b/build/images/scripts/install_cni_chaining @@ -15,6 +15,7 @@ # limitations under the License. source logging +source module_utils HOST_CNI_NET_DIR="/host/etc/cni/net.d" @@ -124,8 +125,15 @@ update_cni_conf install -m 755 /usr/local/bin/antrea-cni /host/opt/cni/bin/antrea id -# Load the OVS kernel module -modprobe openvswitch || { echo "Failed to load the OVS kernel module from the container, try running 'modprobe openvswitch' on your Nodes"; exit 1;} + +if [[ -z "${SKIP_LOADING_KERNEL_MODULES:-}" ]]; then + # Load the OVS kernel module if not built-in + if ! is_module_builtin "openvswitch"; then + modprobe openvswitch || { echo "Failed to load the OVS kernel module from the container, try running 'modprobe openvswitch' on your Nodes"; exit 1;} + else + log_info "install_cni_chaining" "Module openvswitch is built-in" + fi +fi if [[ "$run_monitor" == "false" ]]; then exit 0 diff --git a/build/images/scripts/module_utils b/build/images/scripts/module_utils new file mode 100644 index 00000000000..12783ba1d59 --- /dev/null +++ b/build/images/scripts/module_utils @@ -0,0 +1,19 @@ +# Copyright 2024 Antrea Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function is_module_builtin { + local module="$1" + local path="/lib/modules/$(uname -r)/modules.builtin" + grep -q "$module\.ko" "$path" +} diff --git a/build/images/scripts/start_ovs b/build/images/scripts/start_ovs index f9970a8cfc4..daff5c5a7df 100755 --- a/build/images/scripts/start_ovs +++ b/build/images/scripts/start_ovs @@ -24,7 +24,6 @@ OVS_DB_FILE="${OVS_RUN_DIR}/conf.db" OVS_LOGROTATE_CONF="/etc/logrotate.d/openvswitch-switch" hw_offload="false" -skip_kmod="false" log_file_max_num=0 log_file_max_size=0 @@ -32,7 +31,6 @@ function usage { echo "start_ovs" echo -e " -h|--help\t\t \tPrint help message" echo -e " --hw-offload\t\t \tEnable OVS hardware offload" - echo -e " --skip-kmod\t\t \tForce skip Kernel module loading in OVS start script" echo -e " --log_file_max_num= \tMaximum number of log files to be kept for an OVS daemon. Value 0 means keeping the current value" echo -e " --log_file_max_size= \tMaximum size (in megabytes) of an OVS log file. Value 0 means keeping the current value" } @@ -47,7 +45,7 @@ while (( "$#" )); do hw_offload="true" ;; --skip-kmod) - skip_kmod="true" + echo "--skip-kmod is deprecated and will be removed in the future; it is now the default behavior" >&2 ;; --log_file_max_num=*) log_file_max_num=$1 @@ -149,15 +147,15 @@ set -euo pipefail # exit with code 128 + SIGNAL trap "quit" INT TERM -if [ "$skip_kmod" == "true" ]; then - # ovs-ctl start will invoke ovs-kmod-ctl to load the openvswitch Kernel module if necessary - # (using modprobe). In some cases, this can fail unexpectedly, for example, with Talos Linux - # (see https://github.com/antrea-io/antrea/issues/5707). This is why this script offers the - # skip-kmod flag, which prevents the ovs-ctl script from trying to load any Kernel module. In - # order for this to work, we need to turn ovs-kmod-ctl into a "no-op". - cp /usr/share/openvswitch/scripts/ovs-kmod-ctl /usr/share/openvswitch/scripts/ovs-kmod-ctl.bak - echo ":" > /usr/share/openvswitch/scripts/ovs-kmod-ctl -fi +# ovs-ctl start will invoke ovs-kmod-ctl to load the openvswitch Kernel module +# if necessary (using modprobe). +# We want to avoid this behavior for the following reasons: +# * the initContainer is already in charge of loading the module if necessary +# * we do not mount the host's /lib/module directory into the antrea-ovs container +# * in some cases, modprobe will fail if the module is built-in +# Therefore, we turn ovs-kmod-ctl into a "no-op". +cp /usr/share/openvswitch/scripts/ovs-kmod-ctl /usr/share/openvswitch/scripts/ovs-kmod-ctl.bak +echo ":" > /usr/share/openvswitch/scripts/ovs-kmod-ctl update_logrotate_config_file