Skip to content

Commit

Permalink
UBUNTU: SAUCE: fan: add VXLAN implementation
Browse files Browse the repository at this point in the history
BugLink: https://bugs.launchpad.net/bugs/2064508

Generify the fan mapping support and utilise that to implement fan
mappings over vxlan transport.

Expose the existance of this functionality (when the module is loaded)
via an additional sysctl marker.

Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
[apw@canonical.com: added feature marker for fan over vxlan.]
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>

[ arighi: adjust conflicts in vxlan_xmit() and vxlan_xmit_one() for 6.4-rc1 ]
[ arighi: support v6.8 ABI ]
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>

[tjaalton: support v6.11 ABI ]
[tjaalton: Remove the now superfluous sentinel element from ctl_table array ]
Signed-off-by: Timo Aaltonen <timo.aaltonen@canonical.com>
  • Loading branch information
jay-vosburgh authored and tjaalton committed Oct 14, 2024
1 parent 9a8f55e commit 2bc3ab5
Show file tree
Hide file tree
Showing 7 changed files with 452 additions and 66 deletions.
244 changes: 244 additions & 0 deletions drivers/net/vxlan/vxlan_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/udp.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/ethtool.h>
#include <net/arp.h>
Expand Down Expand Up @@ -71,6 +72,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
ip_tunnel_collect_metadata();
}

static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr)
{
struct ip_fan_map *fan_map;

rcu_read_lock();
list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
if (fan_map->overlay ==
(daddr & inet_make_mask(fan_map->overlay_prefix))) {
rcu_read_unlock();
return fan_map;
}
}
rcu_read_unlock();

return NULL;
}

static void vxlan_fan_flush_map(struct vxlan_dev *vxlan)
{
struct ip_fan_map *fan_map;

list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
list_del_rcu(&fan_map->list);
kfree_rcu(fan_map, rcu);
}
}

static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay)
{
struct ip_fan_map *fan_map;

fan_map = vxlan_fan_find_map(vxlan, overlay);
if (!fan_map)
return -ENOENT;

list_del_rcu(&fan_map->list);
kfree_rcu(fan_map, rcu);

return 0;
}

static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map)
{
__be32 overlay_mask, underlay_mask;
struct ip_fan_map *fan_map;

overlay_mask = inet_make_mask(map->overlay_prefix);
underlay_mask = inet_make_mask(map->underlay_prefix);

netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n",
map->overlay, map->overlay_prefix,
map->underlay, map->underlay_prefix,
overlay_mask, underlay_mask);

if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
return -EINVAL;

if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
return -EINVAL;

/* Special case: overlay 0 and underlay 0: flush all mappings */
if (!map->overlay && !map->underlay) {
vxlan_fan_flush_map(vxlan);
return 0;
}

/* Special case: overlay set and underlay 0: clear map for overlay */
if (!map->underlay)
return vxlan_fan_del_map(vxlan, map->overlay);

if (vxlan_fan_find_map(vxlan, map->overlay))
return -EEXIST;

fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
fan_map->underlay = map->underlay;
fan_map->overlay = map->overlay;
fan_map->underlay_prefix = map->underlay_prefix;
fan_map->overlay_mask = ntohl(overlay_mask);
fan_map->overlay_prefix = map->overlay_prefix;

list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps);

return 0;
}

static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan)
{
struct ifla_fan_map *map;
struct nlattr *attr;
int rem, rv;

nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
map = nla_data(attr);
rv = vxlan_fan_add_map(vxlan, map);
if (rv)
return rv;
}

return 0;
}

static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb,
struct vxlan_rdst *fan_rdst)
{
struct ip_fan_map *f_map;
union vxlan_addr *va;
u32 daddr, underlay;
struct arphdr *arp;
void *arp_ptr;
struct ethhdr *eth;
struct iphdr *iph;

eth = eth_hdr(skb);
switch (eth->h_proto) {
case htons(ETH_P_IP):
iph = ip_hdr(skb);
if (!iph)
return -EINVAL;
daddr = iph->daddr;
break;
case htons(ETH_P_ARP):
arp = arp_hdr(skb);
if (!arp)
return -EINVAL;
arp_ptr = arp + 1;
netdev_dbg(vxlan->dev,
"vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n",
arp_ptr, arp_ptr + skb->dev->addr_len,
arp_ptr + skb->dev->addr_len + 4,
arp_ptr + (skb->dev->addr_len * 2) + 4);
arp_ptr += (skb->dev->addr_len * 2) + 4;
memcpy(&daddr, arp_ptr, 4);
break;
default:
netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto);
return -EINVAL;
}

f_map = vxlan_fan_find_map(vxlan, daddr);
if (!f_map)
return -EINVAL;

daddr = ntohl(daddr);
underlay = ntohl(f_map->underlay);
if (!underlay)
return -EINVAL;

memset(fan_rdst, 0, sizeof(*fan_rdst));
va = &fan_rdst->remote_ip;
va->sa.sa_family = AF_INET;
fan_rdst->remote_vni = vxlan->default_dst.remote_vni;
va->sin.sin_addr.s_addr = htonl(underlay |
((daddr & ~f_map->overlay_mask) >>
(32 - f_map->overlay_prefix -
(32 - f_map->underlay_prefix))));
netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n",
daddr, underlay, va->sin.sin_addr.s_addr);

return 0;
}

/* Find VXLAN socket based on network namespace, address family, UDP port,
* enabled unshareable flags and socket device binding (see l3mdev with
* non-default VRF).
Expand Down Expand Up @@ -2466,6 +2628,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}

if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) {
netdev_dbg(dev, "discard fan to localhost %pI4\n",
&rdst->remote_ip.sin.sin_addr.s_addr);
ip_rt_put(rt);
goto tx_free;
}

if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
Expand Down Expand Up @@ -2603,6 +2772,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_release(ndst);
DEV_STATS_INC(dev, tx_errors);
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0);
tx_free:
kfree_skb(skb);
}

Expand Down Expand Up @@ -2750,6 +2920,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}

if (fan_has_map(&vxlan->fan)) {
struct vxlan_rdst fan_rdst;

netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n",
eth->h_proto, eth->h_dest);
if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) {
dev->stats.tx_dropped++;
kfree_skb(skb);
return NETDEV_TX_OK;
}
vxlan_xmit_one(skb, dev, vni, &fan_rdst, 0);
return NETDEV_TX_OK;
}

eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
Expand Down Expand Up @@ -3349,6 +3533,8 @@ static void vxlan_setup(struct net_device *dev)
spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
}

INIT_LIST_HEAD(&vxlan->fan.fan_maps);
}

static void vxlan_ether_setup(struct net_device *dev)
Expand Down Expand Up @@ -4079,6 +4265,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->remote_ip.sa.sa_family = AF_INET6;
}

if (data[IFLA_VXLAN_FAN_MAP]) {
err = vxlan_parse_fan_map(data, vxlan);
if (err)
return err;
}

if (data[IFLA_VXLAN_LOCAL]) {
if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
Expand Down Expand Up @@ -4464,6 +4656,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(0) + /* IFLA_VXLAN_GPE */
nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */
nla_total_size(sizeof(struct ip_fan_map) * 256) +
0;
}

Expand Down Expand Up @@ -4510,6 +4703,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
}
}

if (fan_has_map(&vxlan->fan)) {
struct nlattr *fan_nest;
struct ip_fan_map *fan_map;

fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP);
if (!fan_nest)
goto nla_put_failure;
list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
struct ifla_fan_map map;

map.underlay = fan_map->underlay;
map.underlay_prefix = fan_map->underlay_prefix;
map.overlay = fan_map->overlay;
map.overlay_prefix = fan_map->overlay_prefix;
if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
goto nla_put_failure;
}
nla_nest_end(skb, fan_nest);
}

if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
Expand Down Expand Up @@ -4850,6 +5063,21 @@ static __net_init int vxlan_init_net(struct net *net)
NULL);
}

#ifdef CONFIG_SYSCTL
static struct ctl_table_header *vxlan_fan_header;
static unsigned int vxlan_fan_version = 4;

static struct ctl_table vxlan_fan_sysctls[] = {
{
.procname = "vxlan",
.data = &vxlan_fan_version,
.maxlen = sizeof(vxlan_fan_version),
.mode = 0444,
.proc_handler = proc_dointvec,
},
};
#endif /* CONFIG_SYSCTL */

static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn,
struct list_head *dev_to_kill)
{
Expand Down Expand Up @@ -4915,7 +5143,20 @@ static int __init vxlan_init_module(void)

vxlan_vnifilter_init();

#ifdef CONFIG_SYSCTL
vxlan_fan_header = register_net_sysctl(&init_net, "net/fan",
vxlan_fan_sysctls);
if (!vxlan_fan_header) {
rc = -ENOMEM;
goto sysctl_failed;
}
#endif /* CONFIG_SYSCTL */

return 0;
#ifdef CONFIG_SYSCTL
sysctl_failed:
rtnl_link_unregister(&vxlan_link_ops);
#endif /* CONFIG_SYSCTL */
out4:
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
out3:
Expand All @@ -4929,6 +5170,9 @@ late_initcall(vxlan_init_module);

static void __exit vxlan_cleanup_module(void)
{
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(vxlan_fan_header);
#endif /* CONFIG_SYSCTL */
vxlan_vnifilter_uninit();
rtnl_link_unregister(&vxlan_link_ops);
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
Expand Down
18 changes: 16 additions & 2 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,18 @@ struct metadata_dst;
*/
#define FAN_OVERLAY_CNT 256

struct ip_fan_map {
__be32 underlay;
__be32 overlay;
u16 underlay_prefix;
u16 overlay_prefix;
u32 overlay_mask;
struct list_head list;
struct rcu_head rcu;
};

struct ip_tunnel_fan {
/* u32 __rcu *map;*/
u32 map[FAN_OVERLAY_CNT];
struct list_head fan_maps;
};

/* Kernel-side variant of ip_tunnel_parm */
Expand Down Expand Up @@ -199,6 +208,11 @@ struct ip_tunnel {
bool ignore_df;
};

static inline int fan_has_map(const struct ip_tunnel_fan *fan)
{
return !list_empty(&fan->fan_maps);
}

struct tnl_ptk_info {
IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 proto;
Expand Down
2 changes: 2 additions & 0 deletions include/net/vxlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ struct vxlan_dev {
struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */

struct ip_tunnel_fan fan;

struct timer_list age_timer;
spinlock_t hash_lock[FDB_HASH_SIZE];
unsigned int addrcnt;
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/if_link.h
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ enum {
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
IFLA_VXLAN_FAN_MAP = 33,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
Expand Down
4 changes: 1 addition & 3 deletions include/uapi/linux/if_tunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,6 @@ enum {
__IP_TUNNEL_FLAG_NUM,
};

#define TUNNEL_FAN __cpu_to_be16(0x8000)

enum {
IFLA_FAN_UNSPEC,
IFLA_FAN_MAPPING,
Expand All @@ -232,7 +230,7 @@ enum {

#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)

struct ip_tunnel_fan_map {
struct ifla_fan_map {
__be32 underlay;
__be32 overlay;
__u16 underlay_prefix;
Expand Down
Loading

0 comments on commit 2bc3ab5

Please sign in to comment.