From 492b369aaba264b62363f178a9fb3b3d667a1ea8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 27 Jul 2023 01:53:48 -0700 Subject: [PATCH 1/3] uapi: Sync with the kernel to get socket CMD Syncrhonize UAPI to be able to use IORING_OP_URING_CMD operations on sockets. These will bring the following CMD operations: * SOCKET_URING_OP_SIOCINQ - Returns the amount of queued unread data in the receive buffer. * SOCKET_URING_OP_SIOCOUTQ - Returns the amount of unsent data in the socket send queue Signed-off-by: Breno Leitao --- src/include/liburing/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h index 057201f2c..10a6ef080 100644 --- a/src/include/liburing/io_uring.h +++ b/src/include/liburing/io_uring.h @@ -715,6 +715,14 @@ struct io_uring_recvmsg_out { __u32 flags; }; +/* + * Argument for IORING_OP_URING_CMD when file is a socket + */ +enum { + SOCKET_URING_OP_SIOCINQ = 0, + SOCKET_URING_OP_SIOCOUTQ, +}; + #ifdef __cplusplus } #endif From 2459fef094113fc0e4928d9190315852bda3c03a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 27 Jul 2023 05:26:20 -0700 Subject: [PATCH 2/3] io_uring_prep_cmd: Create a new helper for command ops This is a new helper to prepare a sqe for a socket command (SOCKET_URING_OP). Right now, we just have two commands[1]: * SOCKET_URING_OP_SIOCINQ * SOCKET_URING_OP_SIOCOUTQ More sockets commands are being added[2], so, this helper is generic enough to contemplate the upcoming commands. This diff also creates a generic manpage for command (io_uring_prep_cmd), but just populate the socket specific part (io_uring_prep_cmd_sock). [1] https://lore.kernel.org/all/20230627134424.2784797-1-leitao@debian.org/ [2] https://lore.kernel.org/all/20230724142237.358769-1-leitao@debian.org/ Signed-off-by: Breno Leitao --- CHANGELOG | 1 + man/io_uring_prep_cmd.3 | 91 +++++++++++++++++++++++++++++++++++++++++ src/include/liburing.h | 23 +++++++++++ src/liburing-ffi.map | 1 + 4 files changed, 116 insertions(+) create mode 100644 man/io_uring_prep_cmd.3 diff --git a/CHANGELOG b/CHANGELOG index 71ca3919e..42a7fc1f7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ liburing-2.4 release io_uring_prep_socket_direct() factor in being called with IORING_FILE_INDEX_ALLOC for allocating a direct descriptor. - Add io_uring_prep_sendto() function. +- Add io_uring_prep_cmd_sock() function. liburing-2.3 release diff --git a/man/io_uring_prep_cmd.3 b/man/io_uring_prep_cmd.3 new file mode 100644 index 000000000..01b48da72 --- /dev/null +++ b/man/io_uring_prep_cmd.3 @@ -0,0 +1,91 @@ +.\" Copyright (C) 2023 Breno Leitao +.\" +.\" SPDX-License-Identifier: LGPL-2.0-or-later +.\" +.TH io_uring_prep_cmd 3 "July 27, 2023" "liburing-2.5" "liburing Manual" +.SH NAME +io_uring_prep_cmd_sock \- prepare a command request for a socket +.SH SYNOPSIS +.nf +.B #include +.PP +.BI "void io_uring_prep_cmd_sock(struct io_uring_sqe *" sqe "," +.BI " int " cmd_op "," +.BI " int " fd "," +.BI " int " level "," +.BI " int " optname "," +.BI " void " *optval "," +.BI " int " optlen ");" +.fi +.SH DESCRIPTION +.PP +The +.BR io_uring_prep_cmd_sock (3) +function prepares an cmd request for a socket. The submission queue entry +.I sqe +is setup to use the socket file descriptor pointed to by +.I fd +to start an command operation defined by +.I cmd_op. + +This is a generic function, and each command has their own individual +.I level, optname, optval +values. The optlen defines the size pointed by +.I optval. + +.SH Available commands + +.TP +.B SOCKET_URING_OP_SIOCINQ +Returns the amount of queued unread data in the receive buffer. +The socket must not be in LISTEN state, otherwise an error +.B -EINVAL +is returned in the CQE +.I res +field. +The following arguments are not used for this command +.I level, optname, optval +and +.I optlen. + +Negative return value means an error. + +For more information about this command, please check +.BR unix(7). + + +.TP +.B SIOCOUTQ +Returns the amount of unsent data in the socket send queue. +The socket must not be in LISTEN state, otherwise an error +.B -EINVAL +is returned in the CQE +.I res. +field. +The following arguments are not used for this command +.I level, optname, optval +and +.I optlen. + +Negative return value means an error. + +For more information about this command, please check +.BR unix(7). + +.SH NOTES +The memory block pointed by +.I optval +needs to be valid/live until the CQE returns. + +.SH RETURN VALUE +Dependent on the command. + +.SH ERRORS +The CQE +.I res +field will contain the result of the operation. +.SH SEE ALSO +.BR io_uring_get_sqe (3), +.BR io_uring_submit (3), +.BR io_uring_register (2), +.BR unix (7) diff --git a/src/include/liburing.h b/src/include/liburing.h index 21d9aec80..100854473 100644 --- a/src/include/liburing.h +++ b/src/include/liburing.h @@ -1128,6 +1128,29 @@ IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe, __io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1); } + +#define UNUSED(x) (void)(x) + +/* + * Prepare commands for sockets + */ +IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe, + int cmd_op, + int fd, + int level, + int optname, + void *optval, + int optlen) +{ + /* This will be removed once the get/setsockopt() patches land */ + UNUSED(optlen); + UNUSED(optval); + UNUSED(level); + UNUSED(optname); + io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0); + sqe->cmd_op = cmd_op; +} + /* * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in * the SQ ring diff --git a/src/liburing-ffi.map b/src/liburing-ffi.map index 1ebe2e11c..debeccde9 100644 --- a/src/liburing-ffi.map +++ b/src/liburing-ffi.map @@ -173,6 +173,7 @@ LIBURING_2.4 { io_uring_prep_msg_ring_fd_alloc; io_uring_prep_sendto; io_uring_queue_init_mem; + io_uring_prep_sock_cmd; local: *; }; From ac2296e928415ada5f05219b05731f9d93e2011b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 22 Mar 2023 04:25:22 -0700 Subject: [PATCH 3/3] test: socket io cmd operations Create a test to test the new socket commands for sockets. Create tests using SOCK, DGRAM and RAW sockets. Compare the values that come from ioctl with io_uring cmds and make sure they match. Signed-off-by: Breno Leitao --- test/Makefile | 1 + test/socket-io-cmd.c | 215 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 test/socket-io-cmd.c diff --git a/test/Makefile b/test/Makefile index 48931380e..0e3787951 100644 --- a/test/Makefile +++ b/test/Makefile @@ -166,6 +166,7 @@ test_srcs := \ single-issuer.c \ skip-cqe.c \ socket.c \ + socket-io-cmd.c \ socket-rw.c \ socket-rw-eagain.c \ socket-rw-offset.c \ diff --git a/test/socket-io-cmd.c b/test/socket-io-cmd.c new file mode 100644 index 000000000..198c1475d --- /dev/null +++ b/test/socket-io-cmd.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Check that CMD operations on sockets are consistent. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" +#include "helpers.h" + +#define USERDATA 0x1234 +#define MSG "foobarbaz" + +struct fds { + int tx; + int rx; +}; + +/* Create 2 sockets (tx, rx) given the socket type */ +static struct fds create_sockets(bool stream) +{ + struct fds retval; + int fd[2]; + + t_create_socket_pair(fd, stream); + + retval.tx = fd[0]; + retval.rx = fd[1]; + + return retval; +} + +static int create_sqe_and_submit(struct io_uring *ring, int32_t fd, int op) +{ + struct io_uring_sqe *sqe; + int ret; + + assert(fd > 0); + sqe = io_uring_get_sqe(ring); + assert(sqe != NULL); + + io_uring_prep_cmd_sock(sqe, op, fd, 0, 0, NULL, 0); + sqe->user_data = USERDATA; + + /* Submitting SQE */ + ret = io_uring_submit_and_wait(ring, 1); + if (ret <= 0) + return ret; + + return 0; +} + +static int receive_cqe(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + int err; + + err = io_uring_wait_cqe(ring, &cqe); + assert(err == 0); + assert(cqe->user_data == USERDATA); + io_uring_cqe_seen(ring, cqe); + + /* Return the result of the operation */ + return cqe->res; +} + +static ssize_t send_data(struct fds *s, char *str) +{ + size_t written_bytes; + + written_bytes = write(s->tx, str, strlen(str)); + assert(written_bytes == strlen(MSG)); + + return written_bytes; +} + +static int run_test(bool stream) +{ + struct fds sockfds; + size_t bytes_in, bytes_out; + struct io_uring ring; + size_t written_bytes; + int error; + + /* Create three sockets */ + sockfds = create_sockets(stream); + assert(sockfds.tx > 0); + assert(sockfds.rx > 0); + /* Send data sing the sockfds->send */ + written_bytes = send_data(&sockfds, MSG); + + /* Simply io_uring ring creation */ + error = t_create_ring(1, &ring, 0); + if (error == T_SETUP_SKIP) + return error; + else if (error != T_SETUP_OK) + return T_EXIT_FAIL; + + error = create_sqe_and_submit(&ring, sockfds.rx, + SOCKET_URING_OP_SIOCINQ); + bytes_in = receive_cqe(&ring); + if (error) + return T_EXIT_FAIL; + + error = create_sqe_and_submit(&ring, sockfds.tx, + SOCKET_URING_OP_SIOCOUTQ); + if (error) + return T_EXIT_FAIL; + + bytes_out = receive_cqe(&ring); + if (bytes_in == -ENOTSUP || bytes_out == -ENOTSUP) { + fprintf(stderr, "Skipping tests. -ENOTSUP returned\n"); + return T_EXIT_SKIP; + } + + /* + * Assert the number of written bytes are either in the socket buffer + * or on the receive side + */ + if (bytes_in + bytes_out != written_bytes) { + fprintf(stderr, "values does not match: %zu+%zu != %zu\n", + bytes_in, bytes_out, written_bytes); + return T_EXIT_FAIL; + } + + io_uring_queue_exit(&ring); + + return T_EXIT_PASS; +} + +/* + * Make sure that siocoutq and siocinq returns the same value + * using ioctl(2) and uring commands for raw sockets + */ +static int run_test_raw(void) +{ + int ioctl_siocoutq, ioctl_siocinq; + int uring_siocoutq, uring_siocinq; + struct io_uring ring; + int sock, error; + + sock = socket(PF_INET, SOCK_RAW, IPPROTO_TCP); + if (sock == -1) { + /* You need root to create raw socket */ + perror("Not able to create a raw socket"); + return T_EXIT_SKIP; + } + + /* Simple SIOCOUTQ using ioctl */ + error = ioctl(sock, SIOCOUTQ, &ioctl_siocoutq); + if (error < 0) { + fprintf(stderr, "Failed to run ioctl(SIOCOUTQ): %d\n", error); + return T_EXIT_FAIL; + } + + error = ioctl(sock, SIOCINQ, &ioctl_siocinq); + if (error < 0) { + fprintf(stderr, "Failed to run ioctl(SIOCINQ): %d\n", error); + return T_EXIT_FAIL; + } + + /* Get the same operation using uring cmd */ + error = t_create_ring(1, &ring, 0); + if (error == T_SETUP_SKIP) + return error; + else if (error != T_SETUP_OK) + return T_EXIT_FAIL; + + create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCOUTQ); + uring_siocoutq = receive_cqe(&ring); + + create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCINQ); + uring_siocinq = receive_cqe(&ring); + + /* Compare that both values (ioctl and uring CMD) should be similar */ + if (uring_siocoutq != ioctl_siocoutq) { + fprintf(stderr, "values does not match: %d != %d\n", + uring_siocoutq, ioctl_siocoutq); + return T_EXIT_FAIL; + } + if (uring_siocinq != ioctl_siocinq) { + fprintf(stderr, "values does not match: %d != %d\n", + uring_siocinq, ioctl_siocinq); + return T_EXIT_FAIL; + } + + return T_EXIT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + if (argc > 1) + return 0; + + /* Test SOCK_STREAM */ + err = run_test(true); + if (err) + return err; + + /* Test SOCK_DGRAM */ + err = run_test(false); + if (err) + return err; + + /* Test raw sockets */ + return run_test_raw(); +}