From 57c77ce47063bd298e31ffcf7875ed0c2633bf8d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:11 +0100 Subject: [PATCH 1/4] mlxsw: spectrum_router: Offload decap only for up tunnels When a new local route is added, an IPIP entry is looked up to determine whether the route should be offloaded as a tunnel decap or as a trap. That decision should take into account whether the tunnel netdevice in question is actually IFF_UP, and only install a decap offload if it is. Fixes: 0063587d3587 ("mlxsw: spectrum: Support decap-only IP-in-IP tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 632c7b229054cd..c529e31fc78b75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3996,7 +3996,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, case RTN_LOCAL: ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4, dip); - if (ipip_entry) { + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, From cab43d9c877456d2d8feb77335327316d3de9871 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:12 +0100 Subject: [PATCH 2/4] mlxsw: spectrum_router: Demote tunnels on VRF migration The mlxsw driver currently doesn't offload GRE tunnels if they have the same local address and use the same underlay VRF. When such a situation arises, the tunnels in conflict are demoted to slow path. However, the current code only verifies this condition on tunnel creation and tunnel change, not when a tunnel is moved to a different VRF. When the tunnel has no bound device, underlay and overlay are the same. Thus moving a tunnel moves the underlay as well, and that can cause local address conflict. So modify mlxsw_sp_netdevice_ipip_ol_vrf_event() to check if there are any conflicting tunnels, and demote them if yes. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c529e31fc78b75..a23e452f7261b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1449,9 +1449,27 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; if (!ipip_entry) return 0; + + /* For flat configuration cases, moving overlay to a different VRF might + * cause local address conflict, and the conflicting tunnels need to be + * demoted. + */ + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, false, false, extack); } From d97cda5f465bacc82659263a885703d73759ea04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:13 +0100 Subject: [PATCH 3/4] mlxsw: spectrum_router: Handle encap to demoted tunnels Some tunnels that are offloadable on their own can nonetheless be demoted to slow path if their local address is in conflict with that of another tunnel. When a route is formed for such a tunnel, mlxsw_sp_nexthop_ipip_init() fails to find the corresponding IPIP entry, and that triggers a FIB abort. Resolve the problem by not assuming that a tunnel for which mlxsw_sp_ipip_ops.can_offload() holds also automatically has an IPIP entry. Fixes: af641713e97d ("mlxsw: spectrum_router: Onload conflicting tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a23e452f7261b9..17e2153a8dc9c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3361,22 +3361,19 @@ static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) return ul_dev ? (ul_dev->flags & IFF_UP) : true; } -static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct net_device *ol_dev) +static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_ipip_entry *ipip_entry) { bool removing; if (!nh->nh_grp->gateway || nh->ipip_entry) - return 0; - - nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - if (!nh->ipip_entry) - return -ENOENT; + return; - removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev); + nh->ipip_entry = ipip_entry; + removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - return 0; + mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -3421,21 +3418,21 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct fib_nh *fib_nh) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; struct net_device *dev = fib_nh->nh_dev; - enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -4712,21 +4709,21 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct rt6_info *rt) { - struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; struct net_device *dev = rt->dst.dev; - enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; - if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && - router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); - if (err) - return err; - mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); - return 0; + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); + if (ipip_entry) { + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + if (ipip_ops->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); + return 0; + } } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; From 09dbf6297fa8bb4433d9293623b464750d874685 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Nov 2017 13:17:14 +0100 Subject: [PATCH 4/4] mlxsw: spectrum_router: Update nexthop RIF on update The function mlxsw_sp_nexthop_rif_update() walks the list of nexthops associated with a RIF, and updates the corresponding entries in the switch. It is used in particular when a tunnel underlay netdevice moves to a different VRF, and all the nexthops are migrated over to a new RIF. The problem is that each nexthop holds a reference to its RIF, and that is not updated. So after the old RIF is gone, further activity on these nexthops (such as downing the underlay netdevice) dereferences a dangling pointer. Fix the issue by updating rif of impacted nexthops before calling mlxsw_sp_nexthop_rif_update(). Fixes: 0c5f1cd5ba8c ("mlxsw: spectrum_router: Generalize __mlxsw_sp_ipip_entry_update_tunnel()") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 17e2153a8dc9c6..72ef4f8025f00f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1370,8 +1370,9 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif); static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1389,17 +1390,18 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) { - list_splice_init(&old_lb_rif->common.nexthop_list, - &new_lb_rif->common.nexthop_list); - mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common); - } + if (keep_encap) + mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common); mlxsw_sp_rif_destroy(&old_lb_rif->common); return 0; } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + /** * Update the offload related to an IPIP entry. This always updates decap, and * in addition to that it also: @@ -3560,6 +3562,18 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif) +{ + struct mlxsw_sp_nexthop *nh; + + list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); + list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) + nh->rif = new_rif; + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) {