Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite sockets #579

Merged
merged 1 commit into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions Documentation/pal/host-abi.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
PAL host ABI
============

TODO: This document is outdated and needs a proper review.

PAL Host ABI is the interface used by Gramine to interact with its host. It is translated into
the host's native ABI (e.g. system calls for UNIX) by a layer called the Platform Adaptation Layer
(PAL). A PAL not only exports a set of APIs (PAL APIs) that can be called by the library OS, but
Expand Down Expand Up @@ -229,6 +231,44 @@ Flags used for stream manipulation
.. doxygentypedef:: pal_wait_flags_t
:project: pal


Socket handling
^^^^^^^^^^^^^^^

.. doxygenenum:: pal_socket_domain
:project: pal

.. doxygenenum:: pal_socket_type
:project: pal

.. doxygenstruct:: pal_socket_addr
:project: pal

.. doxygenstruct:: pal_iovec
:project: pal

.. doxygenfunction:: DkSocketCreate
:project: pal

.. doxygenfunction:: DkSocketBind
:project: pal

.. doxygenfunction:: DkSocketListen
:project: pal

.. doxygenfunction:: DkSocketAccept
:project: pal

.. doxygenfunction:: DkSocketConnect
:project: pal

.. doxygenfunction:: DkSocketSend
:project: pal

.. doxygenfunction:: DkSocketRecv
:project: pal


Thread creation
^^^^^^^^^^^^^^^

Expand Down
29 changes: 27 additions & 2 deletions LibOS/shim/include/shim_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ struct shim_fs_ops {
*/
ssize_t (*write)(struct shim_handle* hdl, const void* buf, size_t count, file_off_t* pos);

/*!
* \brief Read a continuous data chunk into multiple buffers.
*
* \param handle Handle.
* \param iov Array of buffers to read to.
* \param iov_len Length of \p iov.
* \param[in,out] pos Position at which to start reading. Might be updated on success.
*
* \returns Number of bytes read on success, negative error code on failure.
*/
ssize_t (*readv)(struct shim_handle* handle, struct iovec* iov, size_t iov_len,
file_off_t* pos);

/*!
* \brief Write a continuous data chunk from multiple buffers.
*
* \param handle Handle.
* \param iov Array of buffers to write from.
* \param iov_len Length of \p iov.
* \param[in,out] pos Position at which to start writing. Might be updated on success.
*
* \returns Number of bytes written on success, negative error code on failure.
*/
ssize_t (*writev)(struct shim_handle* handle, struct iovec* iov, size_t iov_len,
file_off_t* pos);

/*
* \brief Map file at an address.
*
Expand Down Expand Up @@ -889,6 +915,7 @@ extern struct shim_d_ops str_d_ops;
extern struct shim_fs_ops tmp_fs_ops;
extern struct shim_d_ops tmp_d_ops;

/* XXX: why these are called "builtin"? */
extern struct shim_fs chroot_builtin_fs;
extern struct shim_fs chroot_encrypted_builtin_fs;
extern struct shim_fs tmp_builtin_fs;
Expand Down Expand Up @@ -932,8 +959,6 @@ int synthetic_setup_dentry(struct shim_dentry* dent, mode_t type, mode_t perm);

int fifo_setup_dentry(struct shim_dentry* dent, mode_t perm, int fd_read, int fd_write);

int unix_socket_setup_dentry(struct shim_dentry* dent, mode_t perm);

/*
* Calculate the URI for a dentry. The URI scheme is determined by file type (`type` field). It
* needs to be passed separately (instead of using `dent->inode->type`) because the dentry might not
Expand Down
106 changes: 40 additions & 66 deletions LibOS/shim/include/shim_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@

#include <asm/fcntl.h>
#include <asm/resource.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/un.h>
#include <stdalign.h>
#include <stdbool.h>
#include <stdint.h>

#include "atomic.h" // TODO: migrate to stdatomic.h
#include "linux_socket.h"
#include "list.h"
#include "pal.h"
#include "shim_defs.h"
Expand Down Expand Up @@ -69,77 +67,53 @@ struct shim_pipe_handle {
char name[PIPE_URI_SIZE];
};

#define SOCK_STREAM 1
#define SOCK_DGRAM 2
#define SOCK_NONBLOCK 04000
#define SOCK_CLOEXEC 02000000

#define SOL_TCP 6

#define PF_LOCAL 1
#define PF_UNIX PF_LOCAL
#define PF_FILE PF_LOCAL
#define PF_INET 2
#define PF_INET6 10

#define AF_UNIX PF_UNIX
#define AF_INET PF_INET
#define AF_INET6 PF_INET6

#define SOCK_URI_SIZE 108

enum shim_sock_state {
SOCK_CREATED,
SOCK_NEW,
SOCK_BOUND,
SOCK_CONNECTED,
SOCK_BOUNDCONNECTED,
SOCK_LISTENED,
SOCK_ACCEPTED,
SOCK_SHUTDOWN,
SOCK_LISTENING,
};

/*
* Access to `state`, `remote_addr`, `remote_addrlen`, `local_addr`, `local_addrlen, `last_error`,
* `sendtimeout_us`, `receivetimeout_us`, `can_be_read`, `can_be_written` and `was_bound` are
* protected by `lock`.
* `ops`, `domain`, `type` and `protocol` are read-only and do not need any locking.
* Access to `peek` struct is protected by `recv_lock`. This lock also ensures proper ordering of
* stream reads (see the comment in `do_recvmsg` in "LibOS/shim/src/sys/shim_socket.c").
* `pal_handle` should be accessed using atomic operations. It can be NULL. Once it's set, it cannot
* change anymore.
* If you need to take both `recv_lock` and `lock`, take the former first.
*/
struct shim_sock_handle {
struct shim_lock lock;
struct shim_sock_ops* ops;
PAL_HANDLE pal_handle;
int domain;
int sock_type;
int type;
int protocol;
int error;

enum shim_sock_state sock_state;

union shim_sock_addr {
// INET addr
struct {
struct addr_inet {
unsigned short port;
unsigned short ext_port;
union {
struct in_addr v4;
struct in6_addr v6;
} addr;
} bind, conn;
} in;
// UNIX addr
struct addr_unix {
struct shim_dentry* dentry;
char name[PIPE_URI_SIZE];
} un;
} addr;

struct shim_sock_option {
struct shim_sock_option* next;
int level;
int optname;
int optlen;
char optval[];
}* pending_options;

struct shim_peek_buffer {
size_t size; /* total size (capacity) of buffer `buf` */
size_t start; /* beginning of buffered but yet unread data in `buf` */
size_t end; /* end of buffered but yet unread data in `buf` */
char uri[SOCK_URI_SIZE]; /* cached URI for recvfrom(udp_socket) case */
char buf[]; /* peek buffer of size `size` */
}* peek_buffer;
enum shim_sock_state state;
struct sockaddr_storage remote_addr;
size_t remote_addrlen;
struct sockaddr_storage local_addr;
size_t local_addrlen;
struct {
char* buf;
size_t buf_size;
size_t data_size;
} peek;
struct shim_lock recv_lock;
unsigned int last_error;
uint64_t sendtimeout_us;
uint64_t receivetimeout_us;
/* This field denotes whether the socket was ever bound. */
bool was_bound;
/* This field indicates if the socket is ready for read-like operations (`recv`/`read` or
* `accept`, depending on the socket type and state). */
bool can_be_read;
/* Same as above but for `send`/`write`. */
bool can_be_written;
bool reuseaddr;
};

struct shim_dir_handle {
Expand Down
7 changes: 7 additions & 0 deletions LibOS/shim/include/shim_signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ int do_kill_pgroup(IDTYPE sender, IDTYPE pgid, int sig);

void fill_siginfo_code_and_status(siginfo_t* info, int signal, int exit_code);

/*!
* \brief Checks whether `ret` indicates that the syscall was interrupted.
*
* \param ret Return value from some syscall.
*/
bool is_eintr_like(int ret);

int do_nanosleep(uint64_t timeout_us, struct __kernel_timespec* rem);

#endif /* _SHIM_SIGNAL_H_ */
115 changes: 115 additions & 0 deletions LibOS/shim/include/shim_socket.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2022 Intel Corporation
* Borys Popławski <borysp@invisiblethingslab.com>
*/

#include "linux_socket.h"
#include "shim_handle.h"

#define SHIM_SOCK_MAX_PENDING_CONNS 4096

struct shim_sock_ops {
/*!
* \brief Verify the socket handle and initialize type specific fields.
*
* This callback assumes that \p handle is already correctly initialized.
*/
int (*create)(struct shim_handle* handle);

/*!
* \brief Bind the handle to an address.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*bind)(struct shim_handle* handle, void* addr, size_t addrlen);

/*!
* \brief Set the handle into listening mode.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*listen)(struct shim_handle* handle, unsigned int backlog);

/*!
* \brief Accept a connection on a listening handle.
*
* \param handle A handle in listening mode.
* \param is_nonblocking `true` if the new handle is to be set in nonblocking mode.
* \param[out] out_client On success contains the new handle.
*
* This callback is called without any locks and must support concurrent calls.
*/
int (*accept)(struct shim_handle* handle, bool is_nonblocking, struct shim_handle** out_client);

/*!
* \brief Connect the handle to a remote address.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*connect)(struct shim_handle* handle, void* addr, size_t addrlen);

/*!
* \brief Disconnect a previously connected handle.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*disconnect)(struct shim_handle* handle);

/*!
* \brief Get a socket option.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*getsockopt)(struct shim_handle* handle, int level, int optname, void* optval,
size_t* len);

/*!
* \brief Set a socket option.
*
* Must be called with `handle->info.sock.lock` taken.
*/
int (*setsockopt)(struct shim_handle* handle, int level, int optname, void* optval,
size_t len);

/*!
* \brief Send an array of buffers as continuous data.
*
* \param handle A handle.
* \param iov An array of buffers to write from.
* \param iov_len The length of \p iov.
* \param[out] out_size On success contains the number of bytes sent.
* \param addr An address to send to. May be NULL. It's up to the implementation to
* decide what to do with it (which might mean completely ignoring it).
* \param addrlen The length of \p addr.
*/
int (*send)(struct shim_handle* handle, struct iovec* iov, size_t iov_len, size_t* out_size,
void* addr, size_t addrlen);

/*!
* \brief Receive continuous data into an array of buffers.
*
* \param handle A handle.
* \param iov An array of buffers to read to.
* \param iov_len The length of \p iov.
* \param[out] out_total_size On success contains the number of bytes received (STREAM)
* or the datagram size (DGRAM), which might be bigger than
* the total size of buffers in \p iov array.
* \param[out] addr On success contains the address data was received from. May
* be NULL.
* \param[in,out] addrlen The length of \p addr. On success updated to the actual
* length of the address. Bigger than original value indicates
* that truncation has happened.
* \param force_nonblocking If `true` this request should not block. Otherwise just use
* whatever mode the handle is in.
*/
int (*recv)(struct shim_handle* handle, struct iovec* iov, size_t iov_len,
size_t* out_total_size, void* addr, size_t* addrlen, bool force_nonblocking);
};

extern struct shim_sock_ops sock_unix_ops;
extern struct shim_sock_ops sock_ip_ops;

ssize_t do_recvmsg(struct shim_handle* handle, struct iovec* iov, size_t iov_len, void* addr,
size_t* addrlen, unsigned int* flags);
ssize_t do_sendmsg(struct shim_handle* handle, struct iovec* iov, size_t iov_len, void* addr,
size_t addrlen, unsigned int flags);
Loading