From de71361827d4e91b2b647884ba4666d086077cc5 Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Thu, 11 Jun 2020 22:56:03 +0800 Subject: [PATCH] Fix IP not released after restarting antrea-agent There was no sync-up between the IPAM cache of antrea-agent and the IPAM plugin, the IPAM DEL could be skipped incorrectly when deleting a Pod created before antrea-agent was restarted, leading to IP address leak. The handling of multiple CNI DELs is idempotent. There is no need to check in-memory cache before calling IPAM DEL. This patches fixes the issue by removing the cache check and adds a verification in e2e test. --- pkg/agent/cniserver/ipam/ipam_service.go | 4 --- test/e2e/basic_test.go | 34 +++++++++++++++++++++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pkg/agent/cniserver/ipam/ipam_service.go b/pkg/agent/cniserver/ipam/ipam_service.go index db3e21bb13d..1198a49002a 100644 --- a/pkg/agent/cniserver/ipam/ipam_service.go +++ b/pkg/agent/cniserver/ipam/ipam_service.go @@ -82,10 +82,6 @@ func ExecIPAMAdd(cniArgs *cnipb.CniCmdArgs, ipamType string, resultKey string) ( } func ExecIPAMDelete(cniArgs *cnipb.CniCmdArgs, ipamType string, resultKey string) error { - _, ok := GetIPFromCache(resultKey) - if !ok { - return nil - } args := argsFromEnv(cniArgs) driver := ipamDrivers[ipamType] err := driver.Del(args, cniArgs.NetworkConfiguration) diff --git a/test/e2e/basic_test.go b/test/e2e/basic_test.go index 78f1bb5250b..b1d6ce5d7b9 100755 --- a/test/e2e/basic_test.go +++ b/test/e2e/basic_test.go @@ -26,9 +26,9 @@ import ( "k8s.io/apimachinery/pkg/util/wait" + "github.com/vmware-tanzu/antrea/pkg/agent/apiserver/handlers/podinterface" "github.com/vmware-tanzu/antrea/pkg/agent/config" "github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie" - "github.com/vmware-tanzu/antrea/pkg/agent/util" ) // TestDeploy is a "no-op" test that simply performs setup and teardown. @@ -75,9 +75,6 @@ func TestPodAssignIP(t *testing.T) { } func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName string) { - ifName := util.GenerateContainerInterfaceName(podName, testNamespace) - t.Logf("Host interface name for Pod is '%s'", ifName) - var antreaPodName string var err error if antreaPodName, err = data.getAntreaPodOnNode(nodeName); err != nil { @@ -85,6 +82,19 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin } t.Logf("The Antrea Pod for Node '%s' is '%s'", nodeName, antreaPodName) + cmds := []string{"antctl", "get", "podinterface", podName, "-n", testNamespace, "-o", "json"} + stdout, _, err := runAntctl(antreaPodName, cmds, data, t) + var podInterfaces []podinterface.Response + if err := json.Unmarshal([]byte(stdout), &podInterfaces); err != nil { + t.Fatalf("Error when querying the pod interface: %v", err) + } + if len(podInterfaces) != 1 { + t.Fatalf("Expected 1 pod interface, got %d", len(podInterfaces)) + } + ifName := podInterfaces[0].InterfaceName + podIP := podInterfaces[0].IP + t.Logf("Host interface name for Pod is '%s'", ifName) + doesInterfaceExist := func() bool { cmd := fmt.Sprintf("ip link show %s", ifName) if rc, _, _, err := RunCommandOnNode(nodeName, cmd); err != nil { @@ -103,6 +113,16 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin return exists } + doesIPAllocationExist := func() bool { + cmd := fmt.Sprintf("test -f /var/run/antrea/cni/networks/antrea/%s", podIP) + if rc, _, _, err := RunCommandOnNode(nodeName, cmd); err != nil { + t.Fatalf("Error when running ip command on Node '%s': %v", nodeName, err) + } else { + return rc == 0 + } + return false + } + t.Logf("Checking that the veth interface and the OVS port exist") if !doesInterfaceExist() { t.Errorf("Interface '%s' does not exist on Node '%s'", ifName, nodeName) @@ -110,6 +130,9 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin if !doesOVSPortExist() { t.Errorf("OVS port '%s' does not exist on Node '%s'", ifName, nodeName) } + if !doesIPAllocationExist() { + t.Errorf("IP allocation '%s' does not exist on Node '%s'", podIP, nodeName) + } t.Logf("Deleting Pod '%s'", podName) if err := data.deletePodAndWait(defaultTimeout, podName); err != nil { @@ -123,6 +146,9 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin if doesOVSPortExist() { t.Errorf("OVS port '%s' still exists on Node '%s' after Pod deletion", ifName, nodeName) } + if doesIPAllocationExist() { + t.Errorf("IP allocation '%s' still exists on Node '%s'", podIP, nodeName) + } } // TestDeletePod creates a Pod, then deletes it, and checks that the veth interface (in the Node