Skip to content

Commit

Permalink
Do not delete IPv6 link-local route in reconciler
Browse files Browse the repository at this point in the history
In the existing code, the IPv6 link-local route on antrea-gw0 is deleted in
route reconcile, which results in the IPv6 Neighbor Solicitation sent from Pod's
link-local address is dropped on the Node by kenel reverse path filtering, and
Pod would mark the antrea-gw0 as a "FAILED" neighbor. Then the Pod's accross
Node traffic or the Pod-to-external traffic does not work as expected.

This change includes,
1. Do not delete IPv6 link-local routes in the reconcile function,
2. Restore IPv6 link-local route in syncRoute function.

Signed-off-by: wenyingd <wenyingd@vmware.com>
  • Loading branch information
wenyingd committed Sep 15, 2023
1 parent 99a5f25 commit 8c037f6
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 13 deletions.
30 changes: 27 additions & 3 deletions pkg/agent/route/route_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,21 @@ func (c *Client) syncRoute() error {
return err
}
routeMap := make(map[string]*netlink.Route)
routeKey := func(r *netlink.Route) string {
// Use "$Dst_$linkIndex" as the key to avoid the cases that multiple route entries
// are configuring with the same destination but on different net interfaces,
// e.g., one IPv6 link-local route exists per net interface.
return fmt.Sprintf("%s_%d", r.Dst.String(), r.LinkIndex)
}
for i := range routeList {
r := &routeList[i]
if r.Dst == nil {
continue
}
routeMap[r.Dst.String()] = r
routeMap[routeKey(r)] = r
}
restoreRoute := func(route *netlink.Route) bool {
r, ok := routeMap[route.Dst.String()]
r, ok := routeMap[routeKey(route)]
if ok && routeEqual(route, r) {
return true
}
Expand Down Expand Up @@ -277,12 +283,24 @@ func (c *Client) syncRoute() error {
})
}
if c.nodeConfig.PodIPv6CIDR != nil {
// The system auto-generated IPv6 link-local route always uses "fe80::/64"
// as the destination no matter of the interface's global address's mask.
// Here we assume the IPv6 link-local address always exists on antrea-gw0
// to avoid unexpected issues in the IPv6 forwarding.
_, llrCIDR, _ := net.ParseCIDR("fe80::/64")
gwAutoconfRoutes = append(gwAutoconfRoutes, &netlink.Route{
LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex,
Dst: c.nodeConfig.PodIPv6CIDR,
Src: c.nodeConfig.GatewayConfig.IPv6,
Scope: netlink.SCOPE_LINK,
})
},
// Restore the IPv6 link-local route.
&netlink.Route{
LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex,
Dst: llrCIDR,
Scope: netlink.SCOPE_LINK,
},
)
}
for _, route := range gwAutoconfRoutes {
restoreRoute(route)
Expand Down Expand Up @@ -891,6 +909,12 @@ func (c *Client) Reconcile(podCIDRs []string) error {
if desiredPodCIDRs.Has(route.Dst.String()) {
continue
}
// A route destined to an IPv6 link-local CIDR is always system auto-generated along with a link-local
// address, which is not configured by antrea and is supposed to be ignored in the "deletion" list.
// Such routes are helpful in some case, e.g., IPv6 NDP.
if route.Dst.IP.IsLinkLocalUnicast() && route.Dst.IP.To4() == nil {
continue
}
// IPv6 doesn't support "on-link" route, routes to the peer IPv6 gateways need to
// be added separately. So don't delete such routes.
if desiredIPv6GWs.Has(route.Dst.IP.String()) {
Expand Down
6 changes: 6 additions & 0 deletions pkg/agent/route/route_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ func TestSyncRoutes(t *testing.T) {
Src: net.ParseIP("aabb:ccdd::1"),
Scope: netlink.SCOPE_LINK,
})
mockNetlink.EXPECT().RouteReplace(&netlink.Route{
LinkIndex: 10,
Dst: ip.MustParseCIDR("fe80::/64"),
Scope: netlink.SCOPE_LINK,
})

c := &Client{
netlink: mockNetlink,
Expand Down Expand Up @@ -676,6 +681,7 @@ func TestReconcile(t *testing.T) {
{Dst: ip.MustParseCIDR("2001:ab03:cd04:55ee:1001::1/128")}, // existing podCIDR, should not be deleted.
{Dst: ip.MustParseCIDR("fc01::aabb:ccdd:eeff/128")}, // service route, should not be deleted.
{Dst: ip.MustParseCIDR("2001:ab03:cd04:55ee:100b::/80")}, // non-existing podCIDR, should be deleted.
{Dst: ip.MustParseCIDR("fe80::/80")}, // link-local route, should not be deleted.
}, nil)
mockNetlink.EXPECT().RouteDel(&netlink.Route{Dst: ip.MustParseCIDR("192.168.11.0/24")})
mockNetlink.EXPECT().RouteDel(&netlink.Route{Dst: ip.MustParseCIDR("2001:ab03:cd04:55ee:100b::/80")})
Expand Down
33 changes: 23 additions & 10 deletions test/e2e/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo
t.Logf("Retrieving gateway routes on Node '%s'", nodeName)
var routes []Route
if err := wait.PollImmediate(defaultInterval, defaultTimeout, func() (found bool, err error) {
routes, _, err = getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
var llRoute *Route
routes, _, llRoute, err = getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
if err != nil {
return false, err
}
Expand All @@ -391,6 +392,9 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo
} else if len(routes) > expectedRtNumMax {
return false, fmt.Errorf("found too many gateway routes, expected %d but got %d", expectedRtNumMax, len(routes))
}
if isIPv6 && llRoute == nil {
return false, fmt.Errorf("IPv6 link-local route not found")
}
return true, nil
}); err == wait.ErrWaitTimeout {
t.Fatalf("Not enough gateway routes after %v", defaultTimeout)
Expand All @@ -410,8 +414,8 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo
_, routeToAdd.routeCIDR, _ = net.ParseCIDR("99.99.99.0/24")
routeToAdd.routeGW = net.ParseIP("99.99.99.1")
} else {
_, routeToAdd.routeCIDR, _ = net.ParseCIDR("fe80::0/112")
routeToAdd.routeGW = net.ParseIP("fe80::1")
_, routeToAdd.routeCIDR, _ = net.ParseCIDR("fa80::/112")
routeToAdd.routeGW = net.ParseIP("fa80::1")
}

// We run the ip command from the antrea-agent container for delete / add since they need to
Expand Down Expand Up @@ -474,10 +478,14 @@ func testReconcileGatewayRoutesOnStartup(t *testing.T, data *TestData, isIPv6 bo
// We expect the agent to delete the extra route we added and add back the route we deleted
t.Logf("Waiting for gateway routes to converge")
if err := wait.Poll(defaultInterval, defaultTimeout, func() (bool, error) {
newRoutes, _, err := getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
var llRoute *Route
newRoutes, _, llRoute, err := getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
if err != nil {
return false, err
}
if isIPv6 && llRoute == nil {
return false, fmt.Errorf("IPv6 link-local route not found")
}
if len(newRoutes) != len(routes) {
return false, nil
}
Expand Down Expand Up @@ -559,7 +567,7 @@ func testCleanStaleClusterIPRoutes(t *testing.T, data *TestData, isIPv6 bool) {
}
var routes []Route
if err := wait.PollImmediate(defaultInterval, defaultTimeout, func() (bool, error) {
_, routes, err = getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
_, routes, _, err = getGatewayRoutes(t, data, antreaGWName, nodeName, isIPv6)
if err != nil {
t.Logf("Failed to get Service gateway routes: %v", err)
return false, nil
Expand Down Expand Up @@ -650,7 +658,7 @@ type Route struct {
routeGW net.IP
}

func getGatewayRoutes(t *testing.T, data *TestData, antreaGWName, nodeName string, isIPv6 bool) ([]Route, []Route, error) {
func getGatewayRoutes(t *testing.T, data *TestData, antreaGWName, nodeName string, isIPv6 bool) ([]Route, []Route, *Route, error) {
var cmd []string
virtualIP := config.VirtualServiceIPv4
mask := 32
Expand All @@ -664,26 +672,31 @@ func getGatewayRoutes(t *testing.T, data *TestData, antreaGWName, nodeName strin
podName := getAntreaPodName(t, data, nodeName)
stdout, stderr, err := data.RunCommandFromPod(antreaNamespace, podName, agentContainerName, cmd)
if err != nil {
return nil, nil, fmt.Errorf("error when running ip command in Pod '%s': %v - stdout: %s - stderr: %s", podName, err, stdout, stderr)
return nil, nil, nil, fmt.Errorf("error when running ip command in Pod '%s': %v - stdout: %s - stderr: %s", podName, err, stdout, stderr)
}

var nodeRoutes, serviceRoutes []Route
var llRoute *Route
re := regexp.MustCompile(`([^\s]+) via ([^\s]+)`)
for _, line := range strings.Split(stdout, "\n") {
var err error
matches := re.FindStringSubmatch(line)
if len(matches) == 0 {
if isIPv6 && strings.HasPrefix(line, "fe80::") {
llRoute = &Route{}
_, llRoute.routeCIDR, _ = net.ParseCIDR(strings.Split(line, " ")[0])
}
continue
}
if net.ParseIP(matches[1]) != nil {
matches[1] = fmt.Sprintf("%s/%d", matches[1], mask)
}
route := Route{}
if _, route.routeCIDR, err = net.ParseCIDR(matches[1]); err != nil {
return nil, nil, fmt.Errorf("%s is not a valid net CIDR", matches[1])
return nil, nil, nil, fmt.Errorf("%s is not a valid net CIDR", matches[1])
}
if route.routeGW = net.ParseIP(matches[2]); route.routeGW == nil {
return nil, nil, fmt.Errorf("%s is not a valid IP", matches[2])
return nil, nil, nil, fmt.Errorf("%s is not a valid IP", matches[2])
}
if route.routeGW.Equal(virtualIP) {
// If the route is added by AntreaProxy, append it to slice serviceRoutes.
Expand All @@ -693,7 +706,7 @@ func getGatewayRoutes(t *testing.T, data *TestData, antreaGWName, nodeName strin
nodeRoutes = append(nodeRoutes, route)
}
}
return nodeRoutes, serviceRoutes, nil
return nodeRoutes, serviceRoutes, llRoute, nil
}

// testDeletePreviousRoundFlowsOnStartup checks that when the Antrea agent is restarted, flows from
Expand Down

0 comments on commit 8c037f6

Please sign in to comment.