From 5db5adf7a5e1490e7c91ba01529aaa7db14838b6 Mon Sep 17 00:00:00 2001 From: Jianjun Shen Date: Mon, 31 Jul 2023 17:19:51 -0400 Subject: [PATCH] Support Pod secondary interfaces on VLAN network This commit adds support for connecting Pod secondary interfaces to a VLAN network. A Pod secondary interface configured with the VLAN networkType will be connected to the secondary network OVS bridge, and the specified VLAN tag will be set on the OVS port. This commit mostly follows the existing SR-IOV secondary network implementation, in which a Pod controller configures Pod secondary interfaces based on the local Pod events. It thus inherits the existing limitations, like secondary interface configuration is not persisted on the Node and may get lost after antrea-agent restarts. Signed-off-by: Jianjun Shen --- cmd/antrea-agent/agent.go | 2 - .../interface_configuration_linux.go | 5 +- pkg/agent/cniserver/pod_configuration.go | 5 +- .../cniserver/pod_configuration_linux_test.go | 6 +- pkg/agent/cniserver/secondary.go | 103 +++++ pkg/agent/cniserver/server.go | 13 +- pkg/agent/cniserver/server_linux_test.go | 13 +- pkg/agent/cniserver/sriov.go | 117 ------ .../secondarynetwork/cnipodcache/cache.go | 12 +- .../secondarynetwork/cnipodcache/types.go | 16 +- pkg/agent/secondarynetwork/init.go | 48 +-- pkg/agent/secondarynetwork/init_test.go | 9 +- .../secondarynetwork/podwatch/controller.go | 240 ++++++------ .../podwatch/controller_test.go | 353 ++++++++++++++---- pkg/agent/secondarynetwork/podwatch/sriov.go | 173 +++++++++ .../podwatch/testing/mock_podwatch.go | 31 +- pkg/agent/secondarynetwork/podwatch/types.go | 12 +- pkg/agent/util/net.go | 21 +- pkg/agent/util/net_test.go | 91 +++++ 19 files changed, 896 insertions(+), 374 deletions(-) create mode 100644 pkg/agent/cniserver/secondary.go delete mode 100644 pkg/agent/cniserver/sriov.go create mode 100644 pkg/agent/secondarynetwork/podwatch/sriov.go diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 196eb8a417e..4f628f86dbd 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -694,8 +694,6 @@ func run(o *Options) error { if err := secondarynetwork.Initialize( o.config.ClientConnection, o.config.KubeAPIServerOverride, k8sClient, localPodInformer, nodeConfig.Name, cniPodInfoStore, - // safe to call given that cniServer.Initialize has been called already. - cniServer.GetPodConfigurator(), stopCh, &o.config.SecondaryNetwork, ovsdbConnection); err != nil { return fmt.Errorf("failed to initialize secondary network: %v", err) diff --git a/pkg/agent/cniserver/interface_configuration_linux.go b/pkg/agent/cniserver/interface_configuration_linux.go index 9ce9ec62c5e..0927249e4b7 100644 --- a/pkg/agent/cniserver/interface_configuration_linux.go +++ b/pkg/agent/cniserver/interface_configuration_linux.go @@ -249,7 +249,10 @@ func (ic *ifConfigurator) configureContainerLinkVeth( mtu int, result *current.Result, ) error { - hostIfaceName := util.GenerateContainerInterfaceName(podName, podNamespace, containerID) + // Include the container veth interface name in the name generation, as one Pod can have more + // than one interfaces inc. secondary interfaces, while the host interface name must be + // be unique. + hostIfaceName := util.GenerateContainerHostVethName(podName, podNamespace, containerID, containerIfaceName) hostIface := ¤t.Interface{Name: hostIfaceName} containerIface := ¤t.Interface{Name: containerIfaceName, Sandbox: containerNetNS} diff --git a/pkg/agent/cniserver/pod_configuration.go b/pkg/agent/cniserver/pod_configuration.go index 1e0782dfe88..ac4e183c91a 100644 --- a/pkg/agent/cniserver/pod_configuration.go +++ b/pkg/agent/cniserver/pod_configuration.go @@ -84,9 +84,9 @@ func newPodConfigurator( gatewayMAC net.HardwareAddr, ovsDatapathType ovsconfig.OVSDatapathType, isOvsHardwareOffloadEnabled bool, + disableTXChecksumOffload bool, podUpdateNotifier channel.Notifier, podInfoStore cnipodcache.CNIPodInfoStore, - disableTXChecksumOffload bool, ) (*podConfigurator, error) { ifConfigurator, err := newInterfaceConfigurator(ovsDatapathType, isOvsHardwareOffloadEnabled, disableTXChecksumOffload) if err != nil { @@ -259,7 +259,8 @@ func (pc *podConfigurator) configureInterfaces( // Note that the IP address should be advertised after Pod OpenFlow entries are installed, otherwise the packet might // be dropped by OVS. if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, &result.Result); err != nil { - klog.Errorf("Failed to advertise IP address for container %s: %v", containerID, err) + // Do not return an error and fail the interface creation. + klog.ErrorS(err, "Failed to advertise IP address for container", "container ID", containerID) } // Mark the manipulation as success to cancel deferred operations. success = true diff --git a/pkg/agent/cniserver/pod_configuration_linux_test.go b/pkg/agent/cniserver/pod_configuration_linux_test.go index 66b94f7f6e9..aedcdaae97f 100644 --- a/pkg/agent/cniserver/pod_configuration_linux_test.go +++ b/pkg/agent/cniserver/pod_configuration_linux_test.go @@ -476,7 +476,9 @@ func TestConfigureSriovSecondaryInterface(t *testing.T) { name: "advertise-failure", podSriovVFDeviceID: "vf2", advertiseErr: fmt.Errorf("unable to advertise on the sriov link"), - expectedErr: fmt.Errorf("failed to advertise IP address for container %s: unable to advertise on the sriov link", containerID), + // When advertiseContainerAddr returns an error, it is logged, but does not + // cause ConfigureSriovSecondaryInterface to also return an error. + }, { name: "success", podSriovVFDeviceID: "vf3", @@ -499,7 +501,7 @@ func createPodConfigurator(controller *gomock.Controller, testIfaceConfigurator mockOFClient = openflowtest.NewMockClient(controller) ifaceStore = interfacestore.NewInterfaceStore() mockRoute = routetest.NewMockInterface(controller) - configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) configurator.ifConfigurator = testIfaceConfigurator return configurator } diff --git a/pkg/agent/cniserver/secondary.go b/pkg/agent/cniserver/secondary.go new file mode 100644 index 00000000000..11fc559f893 --- /dev/null +++ b/pkg/agent/cniserver/secondary.go @@ -0,0 +1,103 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cniserver + +import ( + "fmt" + + current "github.com/containernetworking/cni/pkg/types/100" + "k8s.io/klog/v2" + + "antrea.io/antrea/pkg/ovs/ovsconfig" +) + +func NewSecondaryInterfaceConfigurator(ovsBridgeClient ovsconfig.OVSBridgeClient) (*podConfigurator, error) { + return newPodConfigurator(ovsBridgeClient, nil, nil, nil, nil, ovsconfig.OVSDatapathSystem, false, false, nil, nil) +} + +// ConfigureSriovSecondaryInterface configures a SR-IOV secondary interface for a Pod. +func (pc *podConfigurator) ConfigureSriovSecondaryInterface( + podName, podNamespace string, + containerID, containerNetNS, containerInterfaceName string, + mtu int, + podSriovVFDeviceID string, + result *current.Result) error { + if podSriovVFDeviceID == "" { + return fmt.Errorf("error getting the Pod SR-IOV VF device ID") + } + + err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerInterfaceName, mtu, "", podSriovVFDeviceID, result, nil) + if err != nil { + return err + } + hostIface := result.Interfaces[0] + containerIface := result.Interfaces[1] + klog.InfoS("Configured SR-IOV interface", "Pod", klog.KRef(podNamespace, podName), "interface", containerInterfaceName, "hostInterface", hostIface) + + if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { + klog.ErrorS(err, "Failed to advertise IP address for SR-IOV interface", "container ID", containerID, "interface", containerInterfaceName) + } + return nil +} + +// ConfigureVLANSecondaryInterface configures a VLAN secondary interface on the secondary network +// OVS bridge, and returns the OVS port UUID. +func (pc *podConfigurator) ConfigureVLANSecondaryInterface( + podName, podNamespace string, + containerID, containerNetNS, containerInterfaceName string, + mtu int, vlanID uint16, + result *current.Result) (string, error) { + // TODO: revisit the possibility of reusing configureInterfaces(), connectInterfaceToOVS() + // removeInterfaces() code, and using InterfaceStore to store secondary interface info. + if err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerInterfaceName, mtu, "", "", result, nil); err != nil { + return "", err + } + hostIface := result.Interfaces[0] + containerIface := result.Interfaces[1] + + success := false + defer func() { + if !success { + if err := pc.ifConfigurator.removeContainerLink(containerID, hostIface.Name); err != nil { + klog.ErrorS(err, "failed to roll back veth creation", "container ID", containerID, "interface", containerInterfaceName) + } + } + }() + + // Use the outer veth interface name as the OVS port name. + ovsPortName := hostIface.Name + ovsPortUUID, err := pc.createOVSPort(ovsPortName, nil, vlanID) + if err != nil { + return "", fmt.Errorf("failed to add OVS port for container %s: %v", containerID, err) + } + klog.InfoS("Configured VLAN interface", "Pod", klog.KRef(podNamespace, podName), "interface", containerInterfaceName, "hostInterface", hostIface) + + if err := pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { + klog.ErrorS(err, "Failed to advertise IP address for VLAN interface", "container ID", containerID, "interface", containerInterfaceName) + } + success = true + return ovsPortUUID, nil +} + +// DeleteVLANSecondaryInterface deletes a VLAN secondary interface. +func (pc *podConfigurator) DeleteVLANSecondaryInterface(containerID, hostInterfaceName, ovsPortUUID string) error { + if err := pc.ovsBridgeClient.DeletePort(ovsPortUUID); err != nil { + return fmt.Errorf("failed to delete OVS port for container %s: %v", containerID, err) + } + if err := pc.ifConfigurator.removeContainerLink(containerID, hostInterfaceName); err != nil { + return err + } + return nil +} diff --git a/pkg/agent/cniserver/server.go b/pkg/agent/cniserver/server.go index 995cc915852..41b1c4abaf7 100644 --- a/pkg/agent/cniserver/server.go +++ b/pkg/agent/cniserver/server.go @@ -387,10 +387,6 @@ func (s *CNIServer) validatePrevResult(cfgArgs *cnipb.CniCmdArgs, prevResult *cu return nil } -func (s *CNIServer) GetPodConfigurator() *podConfigurator { - return s.podConfigurator -} - // Declared variables for testing var ( ipamSecondaryNetworkAdd = ipam.SecondaryNetworkAdd @@ -531,8 +527,7 @@ func (s *CNIServer) CmdAdd(ctx context.Context, request *cnipb.CniCmdRequest) (* if s.secondaryNetworkEnabled { // Go cache the CNI server info at CNIConfigInfo cache, for podWatch usage cniInfo := &cnipodcache.CNIConfigInfo{CNIVersion: cniVersion, PodName: podName, PodNamespace: podNamespace, - ContainerID: cniConfig.ContainerId, ContainerNetNS: netNS, PodCNIDeleted: false, - MTU: cniConfig.MTU} + ContainerID: cniConfig.ContainerId, ContainerNetNS: netNS, PodCNIDeleted: false} s.podConfigurator.podInfoStore.AddCNIConfigInfo(cniInfo) } @@ -668,9 +663,9 @@ func (s *CNIServer) Initialize( s.podConfigurator, err = newPodConfigurator( ovsBridgeClient, ofClient, s.routeClient, ifaceStore, s.nodeConfig.GatewayConfig.MAC, - ovsBridgeClient.GetOVSDatapathType(), ovsBridgeClient.IsHardwareOffloadEnabled(), podUpdateNotifier, - podInfoStore, s.disableTXChecksumOffload, - ) + ovsBridgeClient.GetOVSDatapathType(), ovsBridgeClient.IsHardwareOffloadEnabled(), + s.disableTXChecksumOffload, + podUpdateNotifier, podInfoStore) if err != nil { return fmt.Errorf("error during initialize podConfigurator: %v", err) } diff --git a/pkg/agent/cniserver/server_linux_test.go b/pkg/agent/cniserver/server_linux_test.go index 9acabb19443..c0fd78cea88 100644 --- a/pkg/agent/cniserver/server_linux_test.go +++ b/pkg/agent/cniserver/server_linux_test.go @@ -98,7 +98,7 @@ func TestValidatePrevResult(t *testing.T) { cniConfig.Ifname = ifname cniConfig.Netns = "invalid_netns" sriovVFDeviceID := "" - cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) response := cniServer.validatePrevResult(cniConfig.CniCmdArgs, prevResult, sriovVFDeviceID) checkErrorResponse(t, response, cnipb.ErrorCode_CHECK_INTERFACE_FAILURE, "") }) @@ -109,7 +109,7 @@ func TestValidatePrevResult(t *testing.T) { cniConfig.Netns = "invalid_netns" sriovVFDeviceID := "0000:03:00.6" prevResult.Interfaces = []*current.Interface{hostIface, containerIface} - cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", true, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", true, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) response := cniServer.validatePrevResult(cniConfig.CniCmdArgs, prevResult, sriovVFDeviceID) checkErrorResponse(t, response, cnipb.ErrorCode_CHECK_INTERFACE_FAILURE, "") }) @@ -122,7 +122,7 @@ func TestRemoveInterface(t *testing.T) { ifaceStore = interfacestore.NewInterfaceStore() mockRoute = routetest.NewMockInterface(controller) gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") - podConfigurator, err := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + podConfigurator, err := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) require.Nil(t, err, "No error expected in podConfigurator constructor") containerMAC, _ := net.ParseMAC("aa:bb:cc:dd:ee:ff") @@ -203,7 +203,7 @@ func newMockCNIServer(t *testing.T, controller *gomock.Controller, ipamDriver ip gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") gateway := &config.GatewayConfig{Name: "", IPv4: gwIPv4, MAC: gwMAC} cniServer.nodeConfig = &config.NodeConfig{Name: "node1", PodIPv4CIDR: nodePodCIDRv4, GatewayConfig: gateway} - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) cniServer.enableSecondaryNetworkIPAM = enableSecondaryNetworkIPAM cniServer.isChaining = isChaining cniServer.secondaryNetworkEnabled = secondaryNetworkEnabled @@ -494,8 +494,7 @@ func TestCmdDel(t *testing.T) { cniserver.podConfigurator.ifConfigurator = testIfaceConfigurator if tc.secondaryNetworkEnabled { cniInfo := &cnipodcache.CNIConfigInfo{CNIVersion: supportedCNIVersion, PodName: tc.podName, PodNamespace: testPodNamespace, - ContainerID: containerID, ContainerNetNS: netns, PodCNIDeleted: false, - MTU: 1450} + ContainerID: containerID, ContainerNetNS: netns, PodCNIDeleted: false} cniserver.podConfigurator.podInfoStore.AddCNIConfigInfo(cniInfo) } if tc.ipamDel { @@ -639,7 +638,7 @@ func TestReconcile(t *testing.T) { cniServer := newCNIServer(t) cniServer.routeClient = mockRoute gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) cniServer.podConfigurator.ifConfigurator = newTestInterfaceConfigurator() cniServer.nodeConfig = &config.NodeConfig{ Name: nodeName, diff --git a/pkg/agent/cniserver/sriov.go b/pkg/agent/cniserver/sriov.go deleted file mode 100644 index 905a99b318b..00000000000 --- a/pkg/agent/cniserver/sriov.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2021 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cniserver - -import ( - "context" - "fmt" - "net" - "path" - "time" - - current "github.com/containernetworking/cni/pkg/types/100" - "google.golang.org/grpc" - grpcinsecure "google.golang.org/grpc/credentials/insecure" - "k8s.io/klog/v2" - - // Version v1 of the Kubelet API was introduced in K8s v1.20. - // Using version v1alpha1 instead to support older K8s versions. - podresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1" - - "antrea.io/antrea/pkg/agent/util" -) - -const ( - kubeletPodResourcesPath = "/var/lib/kubelet/pod-resources" - kubeletSocket = "kubelet.sock" - connectionTimeout = 10 * time.Second -) - -type KubeletPodResources struct { - resources []*podresourcesv1alpha1.PodResources -} - -// GetPodContainerDeviceIDs returns the device IDs assigned to a Pod's containers. -func GetPodContainerDeviceIDs(podName string, podNamespace string) ([]string, error) { - ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout) - defer cancel() - - conn, err := grpc.DialContext( - ctx, - path.Join(kubeletPodResourcesPath, kubeletSocket), - grpc.WithTransportCredentials(grpcinsecure.NewCredentials()), - grpc.WithContextDialer(func(ctx context.Context, addr string) (conn net.Conn, e error) { - return util.DialLocalSocket(addr) - }), - ) - if err != nil { - return []string{}, fmt.Errorf("error getting the gRPC client for Pod resources: %v", err) - } - - defer conn.Close() - - client := podresourcesv1alpha1.NewPodResourcesListerClient(conn) - - podResources, err := client.List(ctx, &podresourcesv1alpha1.ListPodResourcesRequest{}) - if err != nil { - return []string{}, fmt.Errorf("error getting the Pod resources: %v %v", podResources, err) - } - - var podDeviceIDs []string - var kpr KubeletPodResources - kpr.resources = podResources.GetPodResources() - for _, pr := range kpr.resources { - if pr.Name == podName && pr.Namespace == podNamespace { - for _, ctr := range pr.Containers { - for _, dev := range ctr.Devices { - podDeviceIDs = append(podDeviceIDs, dev.DeviceIds...) - } - } - } - } - klog.V(2).Infof("Pod container device IDs of %s/%s are: %v", podNamespace, podName, podDeviceIDs) - return podDeviceIDs, nil -} - -// ConfigureSriovSecondaryInterface adds Secondary Interface support. -// Limitation: only SR-IOV interface is supported as of now. -func (pc *podConfigurator) ConfigureSriovSecondaryInterface( - podName string, - podNamespace string, - containerID string, - containerNetNS string, - containerIFDev string, - mtu int, - podSriovVFDeviceID string, - result *current.Result, -) error { - if podSriovVFDeviceID == "" { - return fmt.Errorf("error getting the Pod SR-IOV VF device ID") - } - - err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerIFDev, mtu, "", podSriovVFDeviceID, result, nil) - if err != nil { - return err - } - hostIface := result.Interfaces[0] - containerIface := result.Interfaces[1] - - if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { - return fmt.Errorf("failed to advertise IP address for container %s: %v", containerID, err) - } - - klog.Infof("Configured interfaces for container %s; hostIface: %+v, containerIface: %+v", containerID, hostIface, containerIface) - return nil -} diff --git a/pkg/agent/secondarynetwork/cnipodcache/cache.go b/pkg/agent/secondarynetwork/cnipodcache/cache.go index cc0ec7b6356..0eba91af76e 100644 --- a/pkg/agent/secondarynetwork/cnipodcache/cache.go +++ b/pkg/agent/secondarynetwork/cnipodcache/cache.go @@ -35,19 +35,19 @@ type CNIPodInfoCache struct { } // Add CNIPodInfo to local cache store. -func (c *CNIPodInfoCache) AddCNIConfigInfo(CNIConfig *CNIConfigInfo) { - c.cache.Add(CNIConfig) +func (c *CNIPodInfoCache) AddCNIConfigInfo(cniConfig *CNIConfigInfo) { + c.cache.Add(cniConfig) } // Delete CNIPodInfo from local cache store. -func (c *CNIPodInfoCache) DeleteCNIConfigInfo(CNIConfig *CNIConfigInfo) { - c.cache.Delete(CNIConfig) +func (c *CNIPodInfoCache) DeleteCNIConfigInfo(cniConfig *CNIConfigInfo) { + c.cache.Delete(cniConfig) } -func (c *CNIPodInfoCache) SetPodCNIDeleted(CNIConfig *CNIConfigInfo) { +func (c *CNIPodInfoCache) SetPodCNIDeleted(cniConfig *CNIConfigInfo) { c.Lock() defer c.Unlock() - CNIConfig.PodCNIDeleted = true + cniConfig.PodCNIDeleted = true } // Retrieve a valid CNI cache (PodCNIDeleted is not true) entry for the given Pod name and namespace. diff --git a/pkg/agent/secondarynetwork/cnipodcache/types.go b/pkg/agent/secondarynetwork/cnipodcache/types.go index 1231cf1ebc2..16a58c656d2 100644 --- a/pkg/agent/secondarynetwork/cnipodcache/types.go +++ b/pkg/agent/secondarynetwork/cnipodcache/types.go @@ -20,11 +20,21 @@ type CNIConfigInfo struct { PodNamespace string ContainerID string ContainerNetNS string - MTU int PodCNIDeleted bool - // Uses interface name as a key and the network/CNI config (obtained from network-attachment-definition) as value. + // Interfaces is a map that stores the secondary interface information with interface + // name to be the key. + Interfaces map[string]*InterfaceInfo +} + +type NetworkType string + +type InterfaceInfo struct { + NetworkType NetworkType + HostInterfaceName string + // OVS port UUID for a VLAN interface. + OVSPortUUID string // NOTE: Interface specific network/CNI config required to be maintained for IPAM clean-up needs. - NetworkConfig map[string][]byte + CNIConfig []byte } type CNIPodInfoStore interface { diff --git a/pkg/agent/secondarynetwork/init.go b/pkg/agent/secondarynetwork/init.go index c943f7de40b..eda73d7a824 100644 --- a/pkg/agent/secondarynetwork/init.go +++ b/pkg/agent/secondarynetwork/init.go @@ -47,10 +47,11 @@ func Initialize( podInformer cache.SharedIndexInformer, nodeName string, podCache cnipodcache.CNIPodInfoStore, - interfaceConfigurator podwatch.InterfaceConfigurator, stopCh <-chan struct{}, config *agentconfig.SecondaryNetworkConfig, ovsdb *ovsdb.OVSDB) error { - if err := createOVSBridge(config.OVSBridges, ovsdb); err != nil { + + ovsBridgeClient, err := createOVSBridge(config.OVSBridges, ovsdb) + if err != nil { return err } @@ -63,51 +64,54 @@ func Initialize( // Create podController to handle secondary network configuration for Pods with // k8s.v1.cni.cncf.io/networks Annotation defined. - podWatchController := podwatch.NewPodController( - k8sClient, - netAttachDefClient, - podInformer, - nodeName, - podCache, - interfaceConfigurator) - go podWatchController.Run(stopCh) + if podWatchController, err := podwatch.NewPodController( + k8sClient, netAttachDefClient, podInformer, + nodeName, podCache, ovsBridgeClient); err != nil { + return err + } else { + go podWatchController.Run(stopCh) + } return nil } // TODO: check and update bridge configuration. -func createOVSBridge(bridges []agentconfig.OVSBridgeConfig, ovsdb *ovsdb.OVSDB) error { +func createOVSBridge(bridges []agentconfig.OVSBridgeConfig, ovsdb *ovsdb.OVSDB) (ovsconfig.OVSBridgeClient, error) { if len(bridges) == 0 { - return nil + return nil, nil } // Only one OVS bridge is supported. bridgeConfig := bridges[0] + phyInterface := "" + if len(bridgeConfig.PhysicalInterfaces) > 0 { + phyInterface = bridgeConfig.PhysicalInterfaces[0] + if _, err := interfaceByNameFn(phyInterface); err != nil { + return nil, fmt.Errorf("failed to get interface %s: %v", phyInterface, err) + } + } + ovsBridgeClient := newOVSBridgeFn(bridgeConfig.BridgeName, ovsconfig.OVSDatapathSystem, ovsdb) if err := ovsBridgeClient.Create(); err != nil { - return fmt.Errorf("failed to create OVS bridge %s: %v", bridgeConfig.BridgeName, err) + return nil, fmt.Errorf("failed to create OVS bridge %s: %v", bridgeConfig.BridgeName, err) } klog.InfoS("OVS bridge created", "bridge", bridgeConfig.BridgeName) - if len(bridgeConfig.PhysicalInterfaces) == 0 { - return nil - } - phyInterface := bridgeConfig.PhysicalInterfaces[0] - if _, err := interfaceByNameFn(phyInterface); err != nil { - return fmt.Errorf("failed to get interface %s: %v", phyInterface, err) + if phyInterface == "" { + return ovsBridgeClient, nil } if _, err := ovsBridgeClient.GetOFPort(phyInterface, false); err == nil { klog.V(2).InfoS("Physical interface already connected to OVS bridge, skip the configuration", "device", phyInterface, "bridge", bridgeConfig.BridgeName) - return nil + return ovsBridgeClient, nil } _, err := ovsBridgeClient.CreateUplinkPort(phyInterface, 0, map[string]interface{}{interfacestore.AntreaInterfaceTypeKey: interfacestore.AntreaUplink}) if err != nil { - return fmt.Errorf("failed to create OVS uplink port %s: %v", phyInterface, err) + return nil, fmt.Errorf("failed to create OVS uplink port %s: %v", phyInterface, err) } klog.InfoS("Physical interface added to OVS bridge", "device", phyInterface, "bridge", bridgeConfig.BridgeName) - return nil + return ovsBridgeClient, nil } // CreateNetworkAttachDefClient creates net-attach-def client handle from the given config. diff --git a/pkg/agent/secondarynetwork/init_test.go b/pkg/agent/secondarynetwork/init_test.go index b9538e055dc..a9452d28412 100644 --- a/pkg/agent/secondarynetwork/init_test.go +++ b/pkg/agent/secondarynetwork/init_test.go @@ -98,9 +98,6 @@ func TestCreateOVSBridge(t *testing.T) { ovsBridges: []string{"br1"}, physicalInterfaces: []string{nonExistingInterface, "eth2"}, expectedErr: "failed to get interface", - expectedCalls: func(m *ovsconfigtest.MockOVSBridgeClient) { - m.EXPECT().Create().Return(nil) - }, }, { name: "create port error", @@ -132,11 +129,15 @@ func TestCreateOVSBridge(t *testing.T) { tc.expectedCalls(mockOVSBridgeClient) } - err := createOVSBridge(bridges, nil) + brClient, err := createOVSBridge(bridges, nil) if tc.expectedErr != "" { assert.ErrorContains(t, err, tc.expectedErr) + assert.Nil(t, brClient) } else { require.NoError(t, err) + if tc.expectedCalls != nil { + assert.NotNil(t, brClient) + } } }) } diff --git a/pkg/agent/secondarynetwork/podwatch/controller.go b/pkg/agent/secondarynetwork/podwatch/controller.go index ff1f401e838..fe052e00864 100644 --- a/pkg/agent/secondarynetwork/podwatch/controller.go +++ b/pkg/agent/secondarynetwork/podwatch/controller.go @@ -36,9 +36,10 @@ import ( netdefclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" netdefutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" - cniserver "antrea.io/antrea/pkg/agent/cniserver" + "antrea.io/antrea/pkg/agent/cniserver" cnipodcache "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" - ipam "antrea.io/antrea/pkg/agent/secondarynetwork/ipam" + "antrea.io/antrea/pkg/agent/secondarynetwork/ipam" + "antrea.io/antrea/pkg/ovs/ovsconfig" ) const ( @@ -46,6 +47,8 @@ const ( minRetryDelay = 2 * time.Second maxRetryDelay = 120 * time.Second numWorkers = 4 + // Set resyncPeriod to 0 to disable resyncing. + resyncPeriod = 0 * time.Minute ) const ( @@ -54,28 +57,21 @@ const ( defaultSecondaryInterfaceName = "eth1" startIfaceIndex = 1 endIfaceIndex = 101 -) -// Set resyncPeriod to 0 to disable resyncing. -const resyncPeriod = 0 * time.Minute + interfaceDefaultMTU = 1500 + vlanIDMax = 4094 +) var ( // ipamDelegator is used to request IP addresses for secondary network // interfaces. It can be overridden by unit tests. ipamDelegator ipam.IPAMDelegator = ipam.NewIPAMDelegator() - // getPodContainerDeviceIDs is used to retrieve SRIOV device IDs - // assigned to a specific Pod. It can be overridden by unit tests. - getPodContainerDeviceIDs = cniserver.GetPodContainerDeviceIDs ) -// Structure to associate a unique VF's PCI Address to the Linux ethernet interface. -type podSriovVFDeviceIDInfo struct { - vfDeviceID string - ifName string -} - type InterfaceConfigurator interface { - ConfigureSriovSecondaryInterface(podName string, podNamespace string, containerID string, containerNetNS string, containerIFDev string, mtu int, podSriovVFDeviceID string, result *current.Result) error + ConfigureSriovSecondaryInterface(podName, podNamespace, containerID, containerNetNS, containerInterfaceName string, mtu int, podSriovVFDeviceID string, result *current.Result) error + ConfigureVLANSecondaryInterface(podName, podNamespace, containerID, containerNetNS, containerInterfaceName string, mtu int, vlanID uint16, result *current.Result) (string, error) + DeleteVLANSecondaryInterface(containerID, hostInterfaceName, ovsPortUUID string) error } type PodController struct { @@ -86,6 +82,7 @@ type PodController struct { nodeName string podCache cnipodcache.CNIPodInfoStore interfaceConfigurator InterfaceConfigurator + ovsBridgeClient ovsconfig.OVSBridgeClient vfDeviceIDUsageMap sync.Map } @@ -95,8 +92,12 @@ func NewPodController( podInformer cache.SharedIndexInformer, nodeName string, podCache cnipodcache.CNIPodInfoStore, - interfaceConfigurator InterfaceConfigurator, -) *PodController { + ovsBridgeClient ovsconfig.OVSBridgeClient, +) (*PodController, error) { + interfaceConfigurator, err := cniserver.NewSecondaryInterfaceConfigurator(ovsBridgeClient) + if err != nil { + return nil, fmt.Errorf("failed to create SecondaryInterfaceConfigurator: %v", err) + } pc := PodController{ kubeClient: kubeClient, netAttachDefClient: netAttachDefClient, @@ -105,6 +106,7 @@ func NewPodController( nodeName: nodeName, podCache: podCache, interfaceConfigurator: interfaceConfigurator, + ovsBridgeClient: ovsBridgeClient, } podInformer.AddEventHandlerWithResyncPeriod( cache.ResourceEventHandlerFuncs{ @@ -114,75 +116,22 @@ func NewPodController( }, resyncPeriod, ) - return &pc + return &pc, nil } func podKeyGet(pod *corev1.Pod) string { return pod.Namespace + "/" + pod.Name } -// buildVFDeviceIDListPerPod is a helper function to build a cache structure with the -// list of all the PCI addresses allocated per Pod based on their resource requests (in Pod spec). -// When there is a request for a VF resource (to associate it for a secondary network interface), -// getUnusedSriovVFDeviceIDPerPod will use this cache information to pick up a unique PCI address -// which is still not associated with a network device name. -// NOTE: buildVFDeviceIDListPerPod is called only if a Pod specific VF to Interface mapping cache -// was not build earlier. Sample initial entry per Pod: "{18:01.1,""},{18:01.2,""},{18:01.3,""}" -func (pc *PodController) buildVFDeviceIDListPerPod(podName, podNamespace string) ([]podSriovVFDeviceIDInfo, error) { - podKey := podNamespace + "/" + podName - deviceCache, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) - if cacheFound { - return deviceCache.([]podSriovVFDeviceIDInfo), nil - } - podSriovVFDeviceIDs, err := getPodContainerDeviceIDs(podName, podNamespace) - if err != nil { - return nil, fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) - } - var vfDeviceIDInfoCache []podSriovVFDeviceIDInfo - for _, pciAddress := range podSriovVFDeviceIDs { - initSriovVfDeviceID := podSriovVFDeviceIDInfo{vfDeviceID: pciAddress, ifName: ""} - vfDeviceIDInfoCache = append(vfDeviceIDInfoCache, initSriovVfDeviceID) - } - pc.vfDeviceIDUsageMap.Store(podKey, vfDeviceIDInfoCache) - klog.V(2).InfoS("Pod specific SRIOV VF cache created", "Key", podKey) - return vfDeviceIDInfoCache, nil -} - -func (pc *PodController) deleteVFDeviceIDListPerPod(podName, podNamespace string) { - podKey := podNamespace + "/" + podName - _, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) - if cacheFound { - pc.vfDeviceIDUsageMap.Delete(podKey) - klog.V(2).InfoS("Pod specific SRIOV VF cache cleared", "Key", podKey) - } - return -} - -func (pc *PodController) assignUnusedSriovVFDeviceIDPerPod(podName, podNamespace, interfaceName string) (string, error) { - var cache []podSriovVFDeviceIDInfo - cache, err := pc.buildVFDeviceIDListPerPod(podName, podNamespace) - if err != nil { - return "", err - } - for idx := 0; idx < len(cache); idx++ { - if cache[idx].ifName == "" { - // Unused PCI address found. Associate PCI address to the interface. - cache[idx].ifName = interfaceName - return cache[idx].vfDeviceID, nil - } - } - return "", err -} - func generatePodSecondaryIfaceName(podCNIInfo *cnipodcache.CNIConfigInfo) (string, error) { - // Assign default interface name, if podCNIInfo.NetworkConfig is empty. - if count := len(podCNIInfo.NetworkConfig); count == 0 { + // Assign default interface name, if podCNIInfo.Interfaces is empty. + if len(podCNIInfo.Interfaces) == 0 { return defaultSecondaryInterfaceName, nil } else { // Generate new interface name (eth1,eth2..eth100) and return to caller. for ifaceIndex := startIfaceIndex; ifaceIndex < endIfaceIndex; ifaceIndex++ { ifName := fmt.Sprintf("%s%d", "eth", ifaceIndex) - _, exist := podCNIInfo.NetworkConfig[ifName] + _, exist := podCNIInfo.Interfaces[ifName] if !exist { return ifName, nil } @@ -203,22 +152,33 @@ func whereaboutsArgsBuilder(cmd string, interfaceName string, podCNIInfo *cnipod } -func removePodAllSecondaryNetwork(podCNIInfo *cnipodcache.CNIConfigInfo) error { +func (pc *PodController) deletePodSecondaryNetwork(podCNIInfo *cnipodcache.CNIConfigInfo) error { var cmdArgs *invoke.Args - // Clean-up IPAM at whereabouts db (etcd or kubernetes API server) for all the secondary networks of the Pod which is getting removed. - // PluginArgs added to provide additional arguments required for whereabouts v0.5.1 and above. - // NOTE: SR-IOV VF interface clean-up, upon Pod delete will be handled by SR-IOV device plugin. Not handled here. + // Clean-up IPAM at Whereabouts DB (etcd or Kubernetes API server) for all secondary + // networks of the Pod which is getting removed. + // NOTE: SR-IOV VF interface clean-up, upon Pod delete will be handled by SR-IOV device + // plugin. Not handled here. + // PluginArgs added to provide additional arguments required for Whereabouts v0.5.1 and + // above. cmdArgs = whereaboutsArgsBuilder("DEL", "", podCNIInfo) - // example: podCNIInfo.NetworkConfig = {"eth1": net1-cniconfig, "eth2": net2-cniconfig} - for secNetInstIface, secNetInstConfig := range podCNIInfo.NetworkConfig { + // example: podCNIInfo.Interfaces = {"eth1": net1-cniconfig, "eth2": net2-cniconfig} + for secNetInstIface, interfaceInfo := range podCNIInfo.Interfaces { + if interfaceInfo.NetworkType == vlanNetworkType { + if err := pc.interfaceConfigurator.DeleteVLANSecondaryInterface(podCNIInfo.ContainerID, + interfaceInfo.HostInterfaceName, interfaceInfo.OVSPortUUID); err != nil { + return err + } + } + cmdArgs.IfName = secNetInstIface // Do DelIPAMSubnetAddress on network config (secNetInstConfig) and command argument (updated with interface name). - err := ipamDelegator.DelIPAMSubnetAddress(secNetInstConfig, cmdArgs) + err := ipamDelegator.DelIPAMSubnetAddress(interfaceInfo.CNIConfig, cmdArgs) if err != nil { return fmt.Errorf("Failed to clean-up whereabouts IPAM %v", err) } + // Delete map entry for secNetInstIface, secNetInstConfig - delete(podCNIInfo.NetworkConfig, secNetInstIface) + delete(podCNIInfo.Interfaces, secNetInstIface) } return nil } @@ -252,6 +212,7 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { // Note: Return nil here to unqueue Pod add event. Secondary network configuration will be handled with Pod update event. return nil } + secondaryNetwork, ok := checkForPodSecondaryNetworkAttachement(pod) if !ok { // NOTE: We do not handle Pod annotation deletion/update scenario at present. @@ -265,10 +226,11 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { // Valid cache entry retrieved from cache and we received a Pod add or update event. // Avoid processing Pod annotation, if we already have at least one secondary network successfully configured on this Pod. // We do not support/handle Annotation updates yet. - if len(podCNIInfo.NetworkConfig) > 0 { + if len(podCNIInfo.Interfaces) > 0 { klog.InfoS("Secondary network already configured on this Pod and annotation update not supported, skipping update", "pod", klog.KObj(pod)) return nil } + // Parse Pod annotation and proceed with the secondary network configuration. networklist, err := netdefutils.ParseNetworkAnnotation(secondaryNetwork, pod.Namespace) if err != nil { @@ -278,9 +240,9 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { return nil } - err = pc.configureSecondaryNetwork(pod, networklist, podCNIInfo) + err = pc.configurePodSecondaryNetwork(pod, networklist, podCNIInfo) // We do not return error to retry, if at least one secondary network is configured. - if (err != nil) && (len(podCNIInfo.NetworkConfig) == 0) { + if (err != nil) && (len(podCNIInfo.Interfaces) == 0) { // Return error to requeue and retry. return err } @@ -293,16 +255,16 @@ func (pc *PodController) handleRemovePod(key string) error { // Read the CNI info (stored during Pod creation by cniserver) from cache. // Delete CNI info shared in cache for a specific Pod which is getting removed/deleted. podCNIInfo := pc.podCache.GetAllCNIConfigInfoPerPod(pod[1], pod[0]) - for _, containerInfo := range podCNIInfo { - // Release IPAM of all the secondary interfaces and delete CNI cache. - if err = removePodAllSecondaryNetwork(containerInfo); err != nil { - // Return error to requeue pod delete. + for _, info := range podCNIInfo { + // Delete all secondary interfaces and release IPAM. + if err = pc.deletePodSecondaryNetwork(info); err != nil { + // Return error to requeue Pod delete. return err } else { // Delete cache entry from podCNIInfo. - pc.podCache.DeleteCNIConfigInfo(containerInfo) + pc.podCache.DeleteCNIConfigInfo(info) // Delete Pod specific VF cache (if one exists) - pc.deleteVFDeviceIDListPerPod(containerInfo.PodName, containerInfo.PodNamespace) + pc.deleteVFDeviceIDListPerPod(info.PodName, info.PodNamespace) } } return nil @@ -340,31 +302,15 @@ func (pc *PodController) processNextWorkItem() bool { return true } -// Configure SRIOV VF as a Secondary Network Interface. -func (pc *PodController) configureSriovAsSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, containerInfo *cnipodcache.CNIConfigInfo, result *current.Result) error { - podSriovVFDeviceID, err := pc.assignUnusedSriovVFDeviceIDPerPod(pod.Name, pod.Namespace, network.InterfaceRequest) - if err != nil { - return fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) - } - - if err = pc.interfaceConfigurator.ConfigureSriovSecondaryInterface( - containerInfo.PodName, - containerInfo.PodNamespace, - containerInfo.ContainerID, - containerInfo.ContainerNetNS, - network.InterfaceRequest, - containerInfo.MTU, - podSriovVFDeviceID, - result, - ); err != nil { - return fmt.Errorf("SRIOV Interface creation failed: %v", err) - } - return nil -} - // Configure Secondary Network Interface. -func (pc *PodController) configureSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo, cniConfig []byte) error { - // Generate and assign new interface name, If secondary interface name was not provided in Pod annotation. +func (pc *PodController) configureSecondaryInterface( + pod *corev1.Pod, + network *netdefv1.NetworkSelectionElement, + podCNIInfo *cnipodcache.CNIConfigInfo, + cniConfig []byte, + networkConfig *SecondaryNetworkConfig) error { + // Generate a new interface name, if the secondary interface name was not provided in the + // Pod annotation. if len(network.InterfaceRequest) == 0 { var err error if network.InterfaceRequest, err = generatePodSecondaryIfaceName(podCNIInfo); err != nil { @@ -386,29 +332,52 @@ func (pc *PodController) configureSecondaryInterface(pod *corev1.Pod, network *n for _, ip := range result.IPs { ip.Interface = current.Int(1) } - // Configure SRIOV as a secondary network interface - if err := pc.configureSriovAsSecondaryInterface(pod, network, podCNIInfo, result); err != nil { + + var ovsPortUUID string + var ifConfigErr error + switch networkConfig.NetworkType { + case sriovNetworkType: + ifConfigErr = pc.configureSriovAsSecondaryInterface(pod, network, podCNIInfo, int(networkConfig.MTU), result) + case vlanNetworkType: + ovsPortUUID, ifConfigErr = pc.interfaceConfigurator.ConfigureVLANSecondaryInterface( + podCNIInfo.PodName, podCNIInfo.PodNamespace, + podCNIInfo.ContainerID, podCNIInfo.ContainerNetNS, network.InterfaceRequest, + int(networkConfig.MTU), uint16(networkConfig.VLAN), + result) + } + + if ifConfigErr != nil { // SRIOV interface creation failed. Free allocated IP address if err := ipamDelegator.DelIPAMSubnetAddress(cniConfig, cmdArgs); err != nil { klog.ErrorS(err, "IPAM de-allocation failed: ", err) } - return err + return ifConfigErr } // Update Pod CNI cache with the network config which was successfully configured. - if podCNIInfo.NetworkConfig == nil { - podCNIInfo.NetworkConfig = make(map[string][]byte) - } - podCNIInfo.NetworkConfig[network.InterfaceRequest] = cniConfig + if podCNIInfo.Interfaces == nil { + podCNIInfo.Interfaces = make(map[string]*cnipodcache.InterfaceInfo) + } + hostInterfaceName := "" + if len(result.Interfaces) > 0 { + // In mock tests, result.Interfaces can be nil + hostInterfaceName = result.Interfaces[0].Name + } + interfaceInfo := cnipodcache.InterfaceInfo{ + NetworkType: networkConfig.NetworkType, + HostInterfaceName: hostInterfaceName, + OVSPortUUID: ovsPortUUID, + CNIConfig: cniConfig} + podCNIInfo.Interfaces[network.InterfaceRequest] = &interfaceInfo return nil } -func (pc *PodController) configureSecondaryNetwork(pod *corev1.Pod, networklist []*netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo) error { +func (pc *PodController) configurePodSecondaryNetwork(pod *corev1.Pod, networklist []*netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo) error { for _, network := range networklist { - klog.InfoS("Secondary Network attached to Pod", "network", network, "Pod", klog.KObj(pod)) + klog.V(2).InfoS("Secondary Network attached to Pod", "network", network, "Pod", klog.KObj(pod)) netDefCRD, err := pc.netAttachDefClient.NetworkAttachmentDefinitions(network.Namespace).Get(context.TODO(), network.Name, metav1.GetOptions{}) if err != nil { // NetworkAttachmentDefinition not found at this time. Return error to re-queue and re-try. - return fmt.Errorf("NetworkAttachmentDefinition Get failed: %v", err) + return fmt.Errorf("failed to get NetworkAttachmentDefinition: %v", err) } cniConfig, err := netdefutils.GetCNIConfig(netDefCRD, "") if err != nil { @@ -425,13 +394,28 @@ func (pc *PodController) configureSecondaryNetwork(pod *corev1.Pod, networklist klog.InfoS("NetworkAttachmentDefinition is not of type 'antrea', ignoring", "NetworkAttachmentDefinition", klog.KObj(netDefCRD)) continue } - if networkConfig.NetworkType != sriovNetworkType { + if networkConfig.NetworkType != sriovNetworkType && networkConfig.NetworkType != vlanNetworkType { // same as above, if updated, we will not process the request again. - klog.ErrorS(err, "NetworkType not supported for Pod", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) + klog.ErrorS(err, "Secondary network type not supported", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) continue } + if networkConfig.NetworkType == vlanNetworkType { + if networkConfig.VLAN > vlanIDMax || networkConfig.VLAN < 0 { + klog.ErrorS(nil, "Invalid VLAN ID", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) + continue + } + } + if networkConfig.MTU < 0 { + klog.ErrorS(nil, "Invalid MTU", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) + continue + } + if networkConfig.MTU == 0 { + // TODO: use the physical interface MTU as the default. + networkConfig.MTU = interfaceDefaultMTU + } // secondary network information retrieved from API server. Proceed to configure secondary interface now. - if err = pc.configureSecondaryInterface(pod, network, podCNIInfo, cniConfig); err != nil { + if err = pc.configureSecondaryInterface(pod, network, podCNIInfo, cniConfig, &networkConfig); err != nil { + klog.ErrorS(err, "Secondary interface configuration failed", "Pod", klog.KObj(pod), "networkType", networkConfig.NetworkType) // Secondary interface configuration failed. return error to re-queue and re-try. return fmt.Errorf("secondary interface configuration failed: %v", err) } diff --git a/pkg/agent/secondarynetwork/podwatch/controller_test.go b/pkg/agent/secondarynetwork/podwatch/controller_test.go index c265dc262e9..d1e909c1b68 100644 --- a/pkg/agent/secondarynetwork/podwatch/controller_test.go +++ b/pkg/agent/secondarynetwork/podwatch/controller_test.go @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !windows +// +build !windows + package podwatch import ( + "bytes" "context" "encoding/json" + "errors" "fmt" "net" + "strings" "sync" "sync/atomic" "testing" + "text/template" "time" current "github.com/containernetworking/cni/pkg/types/100" "github.com/golang/mock/gomock" netdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" netdefclientfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + netdefutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -46,14 +54,16 @@ const ( testNamespace = "nsA" testNode = "test-node" - // the ipam information is not actually used when testing, given that we + // The IPAM information is not actually used when testing, given that we // use a mock IPAMDelegator. But this is what the ipam information would // look like when using the actual IPAMDelegator implementation, which - // invokes the whereabouts plugin. - netAttachConfig = `{ + // invokes the Whereabouts plugin. + netAttachTemplate = `{ "cniVersion": "0.3.0", - "type": "antrea", - "networkType": "sriov", + "type": "{{.CNIType}}", + "networkType": "{{.NetworkType}}", + "mtu": {{.MTU}}, + "vlan": {{.VLAN}}, "ipam": { "type": "whereabouts", "datastore": "kubernetes", @@ -66,21 +76,37 @@ const ( defaultMTU = 1500 sriovDeviceID = "sriov-device-id" - podName = "pod1" containerID = "container1" podIP = "1.2.3.4" networkName = "net" interfaceName = "eth2" + ovsPortUUID = "12345678-e29b-41d4-a716-446655440000" ) -func testNetwork(name string) *netdefv1.NetworkAttachmentDefinition { +func testNetwork(name string, networkType cnipodcache.NetworkType) *netdefv1.NetworkAttachmentDefinition { + return testNetworkExt(name, "", networkType, 0, 0) +} + +func testNetworkExt(name, cniType string, networkType cnipodcache.NetworkType, mtu int, vlan int) *netdefv1.NetworkAttachmentDefinition { + if cniType == "" { + cniType = "antrea" + } + data := struct { + CNIType string + NetworkType string + MTU int + VLAN int + }{cniType, string(networkType), mtu, vlan} + tmpl := template.Must(template.New("test").Parse(netAttachTemplate)) + var b bytes.Buffer + tmpl.Execute(&b, &data) return &netdefv1.NetworkAttachmentDefinition{ ObjectMeta: metav1.ObjectMeta{ Name: name, }, Spec: netdefv1.NetworkAttachmentDefinitionSpec{ - Config: netAttachConfig, + Config: b.String(), }, } } @@ -127,7 +153,6 @@ func testPod(name string, container string, podIP string, networks ...netdefv1.N PodNamespace: testNamespace, ContainerID: container, ContainerNetNS: containerNetNs(container), - MTU: defaultMTU, PodCNIDeleted: false, } return pod, cniConfig @@ -145,7 +170,7 @@ func testIPAMResult(cidr string) *current.Result { } func init() { - getPodContainerDeviceIDs = func(name string, namespace string) ([]string, error) { + getPodContainerDeviceIDsFn = func(name string, namespace string) ([]string, error) { return []string{sriovDeviceID}, nil } } @@ -159,14 +184,14 @@ func TestPodControllerRun(t *testing.T) { interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) ipamDelegator = mockIPAM - podController := NewPodController( + podController, _ := NewPodController( client, netdefclient, informerFactory.Core().V1().Pods().Informer(), testNode, podCache, - interfaceConfigurator, - ) + nil) + podController.interfaceConfigurator = interfaceConfigurator stopCh := make(chan struct{}) informerFactory.Start(stopCh) @@ -183,7 +208,7 @@ func TestPodControllerRun(t *testing.T) { Name: networkName, InterfaceRequest: interfaceName, }) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) ipamResult := testIPAMResult("148.14.24.100/24") @@ -233,27 +258,187 @@ func TestPodControllerRun(t *testing.T) { wg.Wait() } -func TestPodControllerAddPod(t *testing.T) { - newPodController := func(ctrl *gomock.Controller) (*PodController, *ipamtesting.MockIPAMDelegator, *podwatchtesting.MockInterfaceConfigurator) { - client := fake.NewSimpleClientset() - netdefclient := netdefclientfake.NewSimpleClientset().K8sCniCncfIoV1() - informerFactory := informers.NewSharedInformerFactory(client, resyncPeriod) - podCache := cnipodcache.NewCNIPodInfoStore() - interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) - mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) - ipamDelegator = mockIPAM - // PodController object without event handlers - return &PodController{ - kubeClient: client, - netAttachDefClient: netdefclient, - queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(minRetryDelay, maxRetryDelay), "podcontroller"), - podInformer: informerFactory.Core().V1().Pods().Informer(), - nodeName: testNode, - podCache: podCache, - interfaceConfigurator: interfaceConfigurator, - }, mockIPAM, interfaceConfigurator +func TestConfigurePodSecondaryNetwork(t *testing.T) { + element1 := netdefv1.NetworkSelectionElement{ + Name: networkName, + Namespace: testNamespace, + InterfaceRequest: interfaceName, + } + ctrl := gomock.NewController(t) + + tests := []struct { + name string + cniType string + networkType cnipodcache.NetworkType + mtu int + vlan int + doNotCreateNetwork bool + interfaceCreated bool + expectedErr string + expectedCalls func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) + }{ + { + name: "VLAN network", + networkType: vlanNetworkType, + mtu: 1600, + vlan: 101, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1600, + uint16(101), + gomock.Any(), + ).Return(ovsPortUUID, nil) + }, + }, + { + name: "default MTU", + networkType: vlanNetworkType, + vlan: 0, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1500, + uint16(0), + gomock.Any(), + ).Return(ovsPortUUID, nil) + }, + }, + { + name: "SRIOV network", + networkType: sriovNetworkType, + mtu: 1500, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureSriovSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1500, + sriovDeviceID, + gomock.Any(), + ).Return(nil) + }, + }, + { + name: "network not found", + networkType: vlanNetworkType, + mtu: 1500, + vlan: 100, + doNotCreateNetwork: true, + expectedErr: "failed to get NetworkAttachmentDefinition:", + }, + { + name: "non-Antrea network", + cniType: "non-antrea", + networkType: vlanNetworkType, + mtu: 1500, + vlan: 100, + }, + { + name: "unsupported network", + networkType: "unsupported", + }, + { + name: "negative MTU", + networkType: sriovNetworkType, + mtu: -1, + }, + { + name: "invalid VLAN", + networkType: vlanNetworkType, + vlan: 4095, + }, + { + name: "negative VLAN", + networkType: vlanNetworkType, + vlan: -200, + }, + { + name: "IPAM failure", + networkType: sriovNetworkType, + mtu: 1500, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), errors.New("failure")) + }, + expectedErr: "secondary network IPAM failed", + }, + { + name: "interface failure", + networkType: vlanNetworkType, + mtu: 1600, + vlan: 101, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1600, + uint16(101), + gomock.Any(), + ).Return("", errors.New("interface creation failure")) + mockIPAM.EXPECT().DelIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(nil) + }, + expectedErr: "interface creation failure", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + pod, cniConfigInfo := testPod(podName, containerID, podIP, element1) + savedCNIConfig := *cniConfigInfo + + pc, mockIPAM, interfaceConfigurator := testPodController(ctrl) + network1 := testNetworkExt(networkName, tc.cniType, tc.networkType, tc.mtu, tc.vlan) + if !tc.doNotCreateNetwork { + pc.netAttachDefClient.NetworkAttachmentDefinitions(testNamespace).Create(context.Background(), network1, metav1.CreateOptions{}) + } + if tc.expectedCalls != nil { + tc.expectedCalls(mockIPAM, interfaceConfigurator) + } + err := pc.configurePodSecondaryNetwork(pod, []*netdefv1.NetworkSelectionElement{&element1}, cniConfigInfo) + if tc.expectedErr == "" { + assert.Nil(t, err) + } else { + assert.True(t, strings.Contains(err.Error(), tc.expectedErr)) + } + + if tc.interfaceCreated { + config1, _ := netdefutils.GetCNIConfig(network1, "") + info := cnipodcache.InterfaceInfo{ + NetworkType: tc.networkType, + CNIConfig: config1, + } + if tc.networkType == vlanNetworkType { + info.OVSPortUUID = ovsPortUUID + } + savedCNIConfig.Interfaces = map[string]*cnipodcache.InterfaceInfo{interfaceName: &info} + } + assert.Equal(t, &savedCNIConfig, cniConfigInfo) + }) } +} + +func TestPodControllerAddPod(t *testing.T) { pod, cniConfig := testPod(podName, containerID, podIP, netdefv1.NetworkSelectionElement{ Name: networkName, InterfaceRequest: interfaceName, @@ -261,7 +446,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing network", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -270,7 +455,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("multiple network interfaces", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) pod, cniConfig := testPod( podName, @@ -285,8 +470,9 @@ func TestPodControllerAddPod(t *testing.T) { InterfaceRequest: "eth11", }, ) - network1 := testNetwork("net1") - network2 := testNetwork("net2") + savedCNIConfig := *cniConfig + network1 := testNetwork("net1", sriovNetworkType) + network2 := testNetworkExt("net2", "", vlanNetworkType, defaultMTU, 100) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -294,20 +480,20 @@ func TestPodControllerAddPod(t *testing.T) { containerID, containerNetNs(containerID), "eth10", - defaultMTU, + interfaceDefaultMTU, gomock.Any(), gomock.Any(), ) - interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( + interfaceConfigurator.EXPECT().ConfigureVLANSecondaryInterface( podName, testNamespace, containerID, containerNetNs(containerID), "eth11", defaultMTU, + uint16(100), gomock.Any(), - gomock.Any(), - ) + ).Return(ovsPortUUID, nil) mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.101/24"), nil) @@ -320,11 +506,37 @@ func TestPodControllerAddPod(t *testing.T) { _, err = podController.netAttachDefClient.NetworkAttachmentDefinitions(testNamespace).Create(context.Background(), network2, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test NetworkAttachmentDefinition") assert.NoError(t, podController.handleAddUpdatePod(pod)) + + infos := podController.podCache.GetAllCNIConfigInfoPerPod(podName, testNamespace) + assert.Equal(t, 1, len(infos)) + config1, _ := netdefutils.GetCNIConfig(network1, "") + config2, _ := netdefutils.GetCNIConfig(network2, "") + savedCNIConfig.Interfaces = map[string]*cnipodcache.InterfaceInfo{ + "eth10": { + NetworkType: sriovNetworkType, + CNIConfig: config1, + }, + "eth11": { + OVSPortUUID: ovsPortUUID, + NetworkType: vlanNetworkType, + CNIConfig: config2, + }, + } + assert.Equal(t, &savedCNIConfig, infos[0]) + + mockIPAM.EXPECT().DelIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(nil).Times(2) + interfaceConfigurator.EXPECT().DeleteVLANSecondaryInterface( + containerID, + gomock.Any(), + ovsPortUUID).Return(nil) + assert.NoError(t, podController.handleRemovePod(testNamespace+"/"+podName)) + infos = podController.podCache.GetAllCNIConfigInfoPerPod(podName, testNamespace) + assert.Equal(t, 0, len(infos)) }) t.Run("no network interfaces", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, podIP) @@ -336,9 +548,9 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing podcache entry", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -349,10 +561,10 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing Status.PodIPs", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, "") - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) @@ -364,18 +576,11 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("different Namespace for Pod and NetworkAttachmentDefinition", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) networkNamespace := "nsB" - network := &netdefv1.NetworkAttachmentDefinition{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: networkNamespace, - Name: networkName, - }, - Spec: netdefv1.NetworkAttachmentDefinitionSpec{ - Config: netAttachConfig, - }, - } + network := testNetwork(networkName, sriovNetworkType) + network.Namespace = networkNamespace pod, cniConfig := testPod(podName, containerID, podIP, netdefv1.NetworkSelectionElement{ Namespace: networkNamespace, @@ -405,7 +610,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("no interface name", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) pod, cniConfig := testPod( podName, @@ -420,7 +625,7 @@ func TestPodControllerAddPod(t *testing.T) { InterfaceRequest: "", }, ) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -456,9 +661,9 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("error when creating interface", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -484,13 +689,13 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("invalid networks annotation", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, podIP) pod.Annotations = map[string]string{ networkAttachDefAnnotationKey: "", } - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) @@ -503,8 +708,8 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("Error when adding VF deviceID cache per Pod", func(t *testing.T) { ctrl := gomock.NewController(t) - network := testNetwork(networkName) - podController, _, _ := newPodController(ctrl) + network := testNetwork(networkName, sriovNetworkType) + podController, _, _ := testPodController(ctrl) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -519,3 +724,23 @@ func TestPodControllerAddPod(t *testing.T) { }) } + +func testPodController(ctrl *gomock.Controller) (*PodController, *ipamtesting.MockIPAMDelegator, *podwatchtesting.MockInterfaceConfigurator) { + client := fake.NewSimpleClientset() + netdefclient := netdefclientfake.NewSimpleClientset().K8sCniCncfIoV1() + informerFactory := informers.NewSharedInformerFactory(client, resyncPeriod) + podCache := cnipodcache.NewCNIPodInfoStore() + interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) + mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) + ipamDelegator = mockIPAM + // PodController object without event handlers + return &PodController{ + kubeClient: client, + netAttachDefClient: netdefclient, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(minRetryDelay, maxRetryDelay), "podcontroller"), + podInformer: informerFactory.Core().V1().Pods().Informer(), + nodeName: testNode, + podCache: podCache, + interfaceConfigurator: interfaceConfigurator, + }, mockIPAM, interfaceConfigurator +} diff --git a/pkg/agent/secondarynetwork/podwatch/sriov.go b/pkg/agent/secondarynetwork/podwatch/sriov.go new file mode 100644 index 00000000000..adbb613a228 --- /dev/null +++ b/pkg/agent/secondarynetwork/podwatch/sriov.go @@ -0,0 +1,173 @@ +// Copyright 2023 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podwatch + +import ( + "context" + "fmt" + "net" + "path" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + // Version v1 of the kubelet API was introduced in K8s v1.20. + // Using version v1alpha1 instead to support older K8s versions. + current "github.com/containernetworking/cni/pkg/types/100" + netdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "google.golang.org/grpc" + grpcinsecure "google.golang.org/grpc/credentials/insecure" + podresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1" + + cnipodcache "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" + "antrea.io/antrea/pkg/agent/util" +) + +const ( + kubeletPodResourcesPath = "/var/lib/kubelet/pod-resources" + kubeletSocket = "kubelet.sock" + connectionTimeout = 10 * time.Second +) + +var ( + // getPodContainerDeviceIDsFn is used to retrieve SRIOV device IDs + // assigned to a specific Pod. It can be overridden by unit tests. + getPodContainerDeviceIDsFn = getPodContainerDeviceIDs +) + +type KubeletPodResources struct { + resources []*podresourcesv1alpha1.PodResources +} + +// Structure to associate a unique VF's PCI Address to the Linux ethernet interface. +type podSriovVFDeviceIDInfo struct { + vfDeviceID string + ifName string +} + +// getPodContainerDeviceIDs returns the device IDs assigned to a Pod's containers. +func getPodContainerDeviceIDs(podName string, podNamespace string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout) + defer cancel() + + conn, err := grpc.DialContext( + ctx, + path.Join(kubeletPodResourcesPath, kubeletSocket), + grpc.WithTransportCredentials(grpcinsecure.NewCredentials()), + grpc.WithContextDialer(func(ctx context.Context, addr string) (conn net.Conn, e error) { + return util.DialLocalSocket(addr) + }), + ) + if err != nil { + return []string{}, fmt.Errorf("error getting the gRPC client for Pod resources: %v", err) + } + + defer conn.Close() + + client := podresourcesv1alpha1.NewPodResourcesListerClient(conn) + if client == nil { + return []string{}, fmt.Errorf("error getting the lister client for Pod resources") + } + + podResources, err := client.List(ctx, &podresourcesv1alpha1.ListPodResourcesRequest{}) + if err != nil { + return []string{}, fmt.Errorf("error getting the Pod resources: %v %v", podResources, err) + } + + var podDeviceIDs []string + var kpr KubeletPodResources + kpr.resources = podResources.GetPodResources() + for _, pr := range kpr.resources { + if pr.Name == podName && pr.Namespace == podNamespace { + for _, ctr := range pr.Containers { + for _, dev := range ctr.Devices { + podDeviceIDs = append(podDeviceIDs, dev.DeviceIds...) + } + } + } + } + klog.V(2).Infof("Pod container device IDs of %s/%s are: %v", podNamespace, podName, podDeviceIDs) + return podDeviceIDs, nil +} + +// buildVFDeviceIDListPerPod is a helper function to build a cache structure with the +// list of all the PCI addresses allocated per Pod based on their resource requests (in Pod spec). +// When there is a request for a VF resource (to associate it for a secondary network interface), +// getUnusedSriovVFDeviceIDPerPod will use this cache information to pick up a unique PCI address +// which is still not associated with a network device name. +// NOTE: buildVFDeviceIDListPerPod is called only if a Pod specific VF to Interface mapping cache +// was not build earlier. Sample initial entry per Pod: "{18:01.1,""},{18:01.2,""},{18:01.3,""}" +func (pc *PodController) buildVFDeviceIDListPerPod(podName, podNamespace string) ([]podSriovVFDeviceIDInfo, error) { + podKey := podNamespace + "/" + podName + deviceCache, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) + if cacheFound { + return deviceCache.([]podSriovVFDeviceIDInfo), nil + } + podSriovVFDeviceIDs, err := getPodContainerDeviceIDsFn(podName, podNamespace) + if err != nil { + return nil, fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) + } + var vfDeviceIDInfoCache []podSriovVFDeviceIDInfo + for _, pciAddress := range podSriovVFDeviceIDs { + initSriovVfDeviceID := podSriovVFDeviceIDInfo{vfDeviceID: pciAddress, ifName: ""} + vfDeviceIDInfoCache = append(vfDeviceIDInfoCache, initSriovVfDeviceID) + } + pc.vfDeviceIDUsageMap.Store(podKey, vfDeviceIDInfoCache) + klog.V(2).InfoS("Pod specific SRIOV VF cache created", "Key", podKey) + return vfDeviceIDInfoCache, nil +} + +func (pc *PodController) deleteVFDeviceIDListPerPod(podName, podNamespace string) { + podKey := podNamespace + "/" + podName + _, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) + if cacheFound { + pc.vfDeviceIDUsageMap.Delete(podKey) + klog.V(2).InfoS("Pod specific SRIOV VF cache cleared", "Key", podKey) + } + return +} + +func (pc *PodController) assignUnusedSriovVFDeviceIDPerPod(podName, podNamespace, interfaceName string) (string, error) { + var cache []podSriovVFDeviceIDInfo + cache, err := pc.buildVFDeviceIDListPerPod(podName, podNamespace) + if err != nil { + return "", err + } + for idx := 0; idx < len(cache); idx++ { + if cache[idx].ifName == "" { + // Unused PCI address found. Associate PCI address to the interface. + cache[idx].ifName = interfaceName + return cache[idx].vfDeviceID, nil + } + } + return "", err +} + +// Configure SRIOV VF as a Secondary Network Interface. +func (pc *PodController) configureSriovAsSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, containerInfo *cnipodcache.CNIConfigInfo, mtu int, result *current.Result) error { + podSriovVFDeviceID, err := pc.assignUnusedSriovVFDeviceIDPerPod(pod.Name, pod.Namespace, network.InterfaceRequest) + if err != nil { + return err + } + + if err = pc.interfaceConfigurator.ConfigureSriovSecondaryInterface( + containerInfo.PodName, containerInfo.PodNamespace, containerInfo.ContainerID, + containerInfo.ContainerNetNS, network.InterfaceRequest, + mtu, podSriovVFDeviceID, result); err != nil { + return fmt.Errorf("SRIOV Interface creation failed: %v", err) + } + return nil +} diff --git a/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go b/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go index bd25546f7e7..296a9cedca8 100644 --- a/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go +++ b/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go @@ -1,4 +1,4 @@ -// Copyright 2022 Antrea Authors +// Copyright 2023 Antrea Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,3 +61,32 @@ func (mr *MockInterfaceConfiguratorMockRecorder) ConfigureSriovSecondaryInterfac mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureSriovSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).ConfigureSriovSecondaryInterface), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) } + +// ConfigureVLANSecondaryInterface mocks base method +func (m *MockInterfaceConfigurator) ConfigureVLANSecondaryInterface(arg0, arg1, arg2, arg3, arg4 string, arg5 int, arg6 uint16, arg7 *types100.Result) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ConfigureVLANSecondaryInterface", arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ConfigureVLANSecondaryInterface indicates an expected call of ConfigureVLANSecondaryInterface +func (mr *MockInterfaceConfiguratorMockRecorder) ConfigureVLANSecondaryInterface(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureVLANSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).ConfigureVLANSecondaryInterface), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) +} + +// DeleteVLANSecondaryInterface mocks base method +func (m *MockInterfaceConfigurator) DeleteVLANSecondaryInterface(arg0, arg1, arg2 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteVLANSecondaryInterface", arg0, arg1, arg2) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteVLANSecondaryInterface indicates an expected call of DeleteVLANSecondaryInterface +func (mr *MockInterfaceConfiguratorMockRecorder) DeleteVLANSecondaryInterface(arg0, arg1, arg2 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteVLANSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).DeleteVLANSecondaryInterface), arg0, arg1, arg2) +} diff --git a/pkg/agent/secondarynetwork/podwatch/types.go b/pkg/agent/secondarynetwork/podwatch/types.go index f52f12239fa..b216315d9a8 100644 --- a/pkg/agent/secondarynetwork/podwatch/types.go +++ b/pkg/agent/secondarynetwork/podwatch/types.go @@ -14,6 +14,8 @@ package podwatch +import "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" + type RouteInfo struct { Dst string `json:"dst,omitempty"` } @@ -28,7 +30,8 @@ type IPAMConfig struct { } const ( - sriovNetworkType = "sriov" + sriovNetworkType cnipodcache.NetworkType = "sriov" + vlanNetworkType cnipodcache.NetworkType = "vlan" ) type SecondaryNetworkConfig struct { @@ -37,6 +40,9 @@ type SecondaryNetworkConfig struct { // Set type to "antrea" Type string `json:"type,omitempty"` // Set networkType to "sriov" - NetworkType string `json:"networkType,omitempty"` - IPAM IPAMConfig `json:"ipam,omitempty"` + NetworkType cnipodcache.NetworkType `json:"networkType,omitempty"` + + MTU int32 `json:"mtu,omitempty"` + VLAN int32 `json:"vlan,omitempty"` + IPAM IPAMConfig `json:"ipam,omitempty"` } diff --git a/pkg/agent/util/net.go b/pkg/agent/util/net.go index 7e08780c104..4e4c0d20d35 100644 --- a/pkg/agent/util/net.go +++ b/pkg/agent/util/net.go @@ -90,9 +90,10 @@ func GenerateNodeTunnelInterfaceKey(nodeName string) string { } // GenerateContainerInterfaceName generates a unique interface name using the -// Pod's namespace, name and containerID. The output should be deterministic (so that -// multiple calls to GenerateContainerInterfaceName with the same parameters -// return the same value). The output has the length of interfaceNameLength(15). +// Pod's Namespace, name and container ID. The output should be deterministic +// (so that multiple calls to GenerateContainerInterfaceName with the same +// parameters return the same value). The output has the length of +// interfaceNameLength(15). // The probability of collision should be neglectable. func GenerateContainerInterfaceName(podName, podNamespace, containerID string) string { // Use the podName as the prefix and the containerID as the hashing key. @@ -100,6 +101,20 @@ func GenerateContainerInterfaceName(podName, podNamespace, containerID string) s return generateInterfaceName(containerID, podName, true) } +// GenerateContainerOuterVethName generates a unique interface name using the +// Pod's Name, container ID, and the container veth interface name. The output +// should be deterministic. +func GenerateContainerHostVethName(podName, podNamespace, containerID, containerVeth string) string { + var key string + if containerVeth == "eth0" { + key = containerID + } else { + // Secondary interface. + key = containerID + containerVeth + } + return generateInterfaceName(key, podName, true) +} + // GenerateNodeTunnelInterfaceName generates a unique interface name for the // tunnel to the Node, using the Node's name. func GenerateNodeTunnelInterfaceName(nodeName string) string { diff --git a/pkg/agent/util/net_test.go b/pkg/agent/util/net_test.go index 42bc4187e01..c6a19a8e5e0 100644 --- a/pkg/agent/util/net_test.go +++ b/pkg/agent/util/net_test.go @@ -70,6 +70,97 @@ func TestGenerateContainerInterfaceName(t *testing.T) { } } +func TestGenerateContainerHostVethName(t *testing.T) { + podName0 := "pod0" + podNamespace0 := "ns0" + containerID0 := "container0" + eth0 := "eth0" + ifaceName0 := GenerateContainerHostVethName(podName0, podNamespace0, containerID0, eth0) + assert.True(t, len(ifaceName0) <= interfaceNameLength) + assert.True(t, strings.HasPrefix(ifaceName0, podName0+"-")) + + tests := []struct { + name string + podName string + podNS string + containerID string + innerName string + namePrefix string + equalToIface0 bool + }{ + { + name: "should equal iface0", + podName: podName0, + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: podName0 + "-", + equalToIface0: true, + }, + { + name: "eth1", + podName: podName0, + podNS: podNamespace0, + containerID: containerID0, + innerName: "eth1", + namePrefix: podName0 + "-", + }, + { + name: "pod1", + podName: "pod1", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod1-", + }, + { + name: "pod0 and different container ID", + podName: podName0, + podNS: podNamespace0, + containerID: "container1", + innerName: eth0, + namePrefix: podName0 + "-", + }, + { + name: "pod0 of ns1", + podName: podName0, + podNS: "ns1", + containerID: containerID0, + innerName: "eth0", + namePrefix: podName0 + "-", + equalToIface0: true, + }, + { + name: "8-char Pod name", + podName: "pod12345", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod12345" + "-", + }, + { + name: "6-char Pod name", + podName: "pod123456", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod12345" + "-", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ifaceName := GenerateContainerHostVethName(tc.podName, tc.podNS, tc.containerID, tc.innerName) + assert.True(t, len(ifaceName) <= interfaceNameLength) + assert.True(t, strings.HasPrefix(ifaceName, tc.namePrefix)) + if tc.equalToIface0 { + assert.Equal(t, ifaceName, ifaceName0) + } else { + assert.NotEqual(t, ifaceName, ifaceName0) + } + }) + } +} + func TestGetIPNetDeviceFromIP(t *testing.T) { testNetInterfaces := generateNetInterfaces() tests := []struct {