Skip to content

Commit

Permalink
Fix IP not released after restarting antrea-agent
Browse files Browse the repository at this point in the history
There was no sync-up between the IPAM cache of antrea-agent and the IPAM
plugin, the IPAM DEL could be skipped incorrectly when deleting a Pod
created before antrea-agent was restarted, leading to IP address leak.

The handling of multiple CNI DELs is idempotent. There is no need to
check in-memory cache before calling IPAM DEL.

This patches fixes the issue by removing the cache check and adds a
verification in e2e test.
  • Loading branch information
tnqn committed Jun 11, 2020
1 parent 3b7566f commit de71361
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
4 changes: 0 additions & 4 deletions pkg/agent/cniserver/ipam/ipam_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ func ExecIPAMAdd(cniArgs *cnipb.CniCmdArgs, ipamType string, resultKey string) (
}

func ExecIPAMDelete(cniArgs *cnipb.CniCmdArgs, ipamType string, resultKey string) error {
_, ok := GetIPFromCache(resultKey)
if !ok {
return nil
}
args := argsFromEnv(cniArgs)
driver := ipamDrivers[ipamType]
err := driver.Del(args, cniArgs.NetworkConfiguration)
Expand Down
34 changes: 30 additions & 4 deletions test/e2e/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import (

"k8s.io/apimachinery/pkg/util/wait"

"github.com/vmware-tanzu/antrea/pkg/agent/apiserver/handlers/podinterface"
"github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie"
"github.com/vmware-tanzu/antrea/pkg/agent/util"
)

// TestDeploy is a "no-op" test that simply performs setup and teardown.
Expand Down Expand Up @@ -75,16 +75,26 @@ func TestPodAssignIP(t *testing.T) {
}

func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName string) {
ifName := util.GenerateContainerInterfaceName(podName, testNamespace)
t.Logf("Host interface name for Pod is '%s'", ifName)

var antreaPodName string
var err error
if antreaPodName, err = data.getAntreaPodOnNode(nodeName); err != nil {
t.Fatalf("Error when retrieving the name of the Antrea Pod running on Node '%s': %v", nodeName, err)
}
t.Logf("The Antrea Pod for Node '%s' is '%s'", nodeName, antreaPodName)

cmds := []string{"antctl", "get", "podinterface", podName, "-n", testNamespace, "-o", "json"}
stdout, _, err := runAntctl(antreaPodName, cmds, data, t)
var podInterfaces []podinterface.Response
if err := json.Unmarshal([]byte(stdout), &podInterfaces); err != nil {
t.Fatalf("Error when querying the pod interface: %v", err)
}
if len(podInterfaces) != 1 {
t.Fatalf("Expected 1 pod interface, got %d", len(podInterfaces))
}
ifName := podInterfaces[0].InterfaceName
podIP := podInterfaces[0].IP
t.Logf("Host interface name for Pod is '%s'", ifName)

doesInterfaceExist := func() bool {
cmd := fmt.Sprintf("ip link show %s", ifName)
if rc, _, _, err := RunCommandOnNode(nodeName, cmd); err != nil {
Expand All @@ -103,13 +113,26 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin
return exists
}

doesIPAllocationExist := func() bool {
cmd := fmt.Sprintf("test -f /var/run/antrea/cni/networks/antrea/%s", podIP)
if rc, _, _, err := RunCommandOnNode(nodeName, cmd); err != nil {
t.Fatalf("Error when running ip command on Node '%s': %v", nodeName, err)
} else {
return rc == 0
}
return false
}

t.Logf("Checking that the veth interface and the OVS port exist")
if !doesInterfaceExist() {
t.Errorf("Interface '%s' does not exist on Node '%s'", ifName, nodeName)
}
if !doesOVSPortExist() {
t.Errorf("OVS port '%s' does not exist on Node '%s'", ifName, nodeName)
}
if !doesIPAllocationExist() {
t.Errorf("IP allocation '%s' does not exist on Node '%s'", podIP, nodeName)
}

t.Logf("Deleting Pod '%s'", podName)
if err := data.deletePodAndWait(defaultTimeout, podName); err != nil {
Expand All @@ -123,6 +146,9 @@ func (data *TestData) testDeletePod(t *testing.T, podName string, nodeName strin
if doesOVSPortExist() {
t.Errorf("OVS port '%s' still exists on Node '%s' after Pod deletion", ifName, nodeName)
}
if doesIPAllocationExist() {
t.Errorf("IP allocation '%s' still exists on Node '%s'", podIP, nodeName)
}
}

// TestDeletePod creates a Pod, then deletes it, and checks that the veth interface (in the Node
Expand Down

0 comments on commit de71361

Please sign in to comment.