Skip to content

Commit

Permalink
vsock: support sockmap
Browse files Browse the repository at this point in the history
This patch adds sockmap support for vsock sockets. It is intended to be
usable by all transports, but only the virtio and loopback transports
are implemented.

SOCK_STREAM, SOCK_DGRAM, and SOCK_SEQPACKET are all supported.

Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Bobby Eshleman authored and davem330 committed Mar 29, 2023
1 parent 24265c2 commit 634f1a7
Show file tree
Hide file tree
Showing 9 changed files with 281 additions and 6 deletions.
1 change: 1 addition & 0 deletions drivers/vhost/vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ static struct virtio_transport vhost_transport = {
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,

.read_skb = virtio_transport_read_skb,
},

.send_pkt = vhost_transport_send_pkt,
Expand Down
1 change: 1 addition & 0 deletions include/linux/virtio_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,5 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
void virtio_transport_deliver_tap_pkt(struct sk_buff *skb);
int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list);
int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor);
#endif /* _LINUX_VIRTIO_VSOCK_H */
17 changes: 17 additions & 0 deletions include/net/af_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ struct vsock_sock {
void *trans;
};

s64 vsock_connectible_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
Expand Down Expand Up @@ -173,6 +174,9 @@ struct vsock_transport {

/* Addressing. */
u32 (*get_local_cid)(void);

/* Read a single skb */
int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
};

/**** CORE ****/
Expand Down Expand Up @@ -225,5 +229,18 @@ int vsock_init_tap(void);
int vsock_add_tap(struct vsock_tap *vt);
int vsock_remove_tap(struct vsock_tap *vt);
void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);

#ifdef CONFIG_BPF_SYSCALL
extern struct proto vsock_proto;
int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init vsock_bpf_build_proto(void);
#else
static inline void __init vsock_bpf_build_proto(void)
{}
#endif

#endif /* __AF_VSOCK_H__ */
1 change: 1 addition & 0 deletions net/vmw_vsock/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
obj-$(CONFIG_VSOCKETS_LOOPBACK) += vsock_loopback.o

vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
vsock-$(CONFIG_BPF_SYSCALL) += vsock_bpf.o

vsock_diag-y += diag.o

Expand Down
64 changes: 58 additions & 6 deletions net/vmw_vsock/af_vsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ static void vsock_sk_destruct(struct sock *sk);
static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);

/* Protocol family. */
static struct proto vsock_proto = {
struct proto vsock_proto = {
.name = "AF_VSOCK",
.owner = THIS_MODULE,
.obj_size = sizeof(struct vsock_sock),
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = vsock_bpf_update_proto,
#endif
};

/* The default peer timeout indicates how long we will wait for a peer response
Expand Down Expand Up @@ -865,7 +868,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);

static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);

Expand All @@ -874,6 +877,7 @@ static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
else
return vsock_stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_connectible_has_data);

s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
Expand Down Expand Up @@ -1131,6 +1135,13 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
return mask;
}

static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
{
struct vsock_sock *vsk = vsock_sk(sk);

return vsk->transport->read_skb(vsk, read_actor);
}

static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
Expand Down Expand Up @@ -1242,18 +1253,42 @@ static int vsock_dgram_connect(struct socket *sock,
memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
sock->state = SS_CONNECTED;

/* sock map disallows redirection of non-TCP sockets with sk_state !=
* TCP_ESTABLISHED (see sock_map_redirect_allowed()), so we set
* TCP_ESTABLISHED here to allow redirection of connected vsock dgrams.
*
* This doesn't seem to be abnormal state for datagram sockets, as the
* same approach can be see in other datagram socket types as well
* (such as unix sockets).
*/
sk->sk_state = TCP_ESTABLISHED;

out:
release_sock(sk);
return err;
}

static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
struct vsock_sock *vsk = vsock_sk(sock->sk);
#ifdef CONFIG_BPF_SYSCALL
const struct proto *prot;
#endif
struct vsock_sock *vsk;
struct sock *sk;

sk = sock->sk;
vsk = vsock_sk(sk);

#ifdef CONFIG_BPF_SYSCALL
prot = READ_ONCE(sk->sk_prot);
if (prot != &vsock_proto)
return prot->recvmsg(sk, msg, len, flags, NULL);
#endif

return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);

static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
Expand All @@ -1272,6 +1307,7 @@ static const struct proto_ops vsock_dgram_ops = {
.recvmsg = vsock_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.read_skb = vsock_read_skb,
};

static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
Expand Down Expand Up @@ -2086,13 +2122,16 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
return err;
}

static int
int
vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
struct sock *sk;
struct vsock_sock *vsk;
const struct vsock_transport *transport;
#ifdef CONFIG_BPF_SYSCALL
const struct proto *prot;
#endif
int err;

sk = sock->sk;
Expand Down Expand Up @@ -2139,6 +2178,14 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
goto out;
}

#ifdef CONFIG_BPF_SYSCALL
prot = READ_ONCE(sk->sk_prot);
if (prot != &vsock_proto) {
release_sock(sk);
return prot->recvmsg(sk, msg, len, flags, NULL);
}
#endif

if (sk->sk_type == SOCK_STREAM)
err = __vsock_stream_recvmsg(sk, msg, len, flags);
else
Expand All @@ -2148,6 +2195,7 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
release_sock(sk);
return err;
}
EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);

static int vsock_set_rcvlowat(struct sock *sk, int val)
{
Expand Down Expand Up @@ -2188,6 +2236,7 @@ static const struct proto_ops vsock_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_rcvlowat = vsock_set_rcvlowat,
.read_skb = vsock_read_skb,
};

static const struct proto_ops vsock_seqpacket_ops = {
Expand All @@ -2209,6 +2258,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.read_skb = vsock_read_skb,
};

static int vsock_create(struct net *net, struct socket *sock,
Expand Down Expand Up @@ -2348,6 +2398,8 @@ static int __init vsock_init(void)
goto err_unregister_proto;
}

vsock_bpf_build_proto();

return 0;

err_unregister_proto:
Expand Down
2 changes: 2 additions & 0 deletions net/vmw_vsock/virtio_transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ static struct virtio_transport virtio_transport = {
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,

.read_skb = virtio_transport_read_skb,
},

.send_pkt = virtio_transport_send_pkt,
Expand Down
25 changes: 25 additions & 0 deletions net/vmw_vsock/virtio_transport_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,31 @@ int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
}
EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);

int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor)
{
struct virtio_vsock_sock *vvs = vsk->trans;
struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb;
int off = 0;
int copied;
int err;

spin_lock_bh(&vvs->rx_lock);
/* Use __skb_recv_datagram() for race-free handling of the receive. It
* works for types other than dgrams.
*/
skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err);
spin_unlock_bh(&vvs->rx_lock);

if (!skb)
return err;

copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);

MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("common code for virtio vsock");
Loading

0 comments on commit 634f1a7

Please sign in to comment.