Skip to content

Commit

Permalink
set IP Don't Fragment flag (#225)
Browse files Browse the repository at this point in the history
* sets IP_DONTFRAG (mac) and IP_PMTUDISC_PROBE (linux and windows). adds udx-level packets-dropped-by-kernel stat (linux)

* set feature flag for IPV6 options on Apple

* windows IPV6_PMTUDISC_* options are IP_PMTUDISC_SET, IP_PMTUDISC_DO, IP_PMTUDISCDONT and IP_PMTUDISC_PROBE
  • Loading branch information
jthomas43 authored Dec 3, 2024
1 parent a9f5af9 commit b737b2d
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 4 deletions.
2 changes: 2 additions & 0 deletions include/udx.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ struct udx_s {

uint64_t packets_rx;
uint64_t packets_tx;

int64_t packets_dropped_by_kernel;
};

struct udx_queue_node_s {
Expand Down
3 changes: 3 additions & 0 deletions src/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad
int
udx__udp_set_rxq_ovfl (uv_os_sock_t fd);

int
udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6);

#endif // UDX_IO_H
36 changes: 33 additions & 3 deletions src/io_posix.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#define _GNU_SOURCE

#if defined(__linux__) || defined(__FreeBSD__)
#define UDX_PLATFORM_HAS_SENDMMSG
#if defined(__APPLE__)
#define __APPLE_USE_RFC_3542
#endif

#include <assert.h>
Expand All @@ -23,6 +23,19 @@ udx__get_link_mtu (const struct sockaddr *addr) {
return -1;
}

int
udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) {
int val = 1;
int rc;
if (is_ipv6) {
rc = setsockopt(fd, IPPROTO_IPV6, IPV6_DONTFRAG, &val, sizeof(val));
} else {
rc = setsockopt(fd, IPPROTO_IP, IP_DONTFRAG, &val, sizeof(val));
}

return rc;
}

#else

int
Expand Down Expand Up @@ -55,6 +68,21 @@ udx__get_link_mtu (const struct sockaddr *addr) {
close(s);
return mtu;
}

int
udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) {
int rc;
if (is_ipv6) {
int val = IPV6_PMTUDISC_PROBE;
rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val));
} else {
int val = IP_PMTUDISC_PROBE;
rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val));
}

return rc;
}

#endif

ssize_t
Expand Down Expand Up @@ -111,11 +139,13 @@ udx__recvmsg (udx_socket_t *handle, uv_buf_t *buf, struct sockaddr *addr, int ad

for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&h); cmsg != NULL; cmsg = CMSG_NXTHDR(&h, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_RXQ_OVFL) {
packets_dropped_by_kernel = *(uint32_t *) CMSG_DATA(cmsg);
memcpy(&packets_dropped_by_kernel, CMSG_DATA(cmsg), sizeof(packets_dropped_by_kernel));
}
}

if (packets_dropped_by_kernel) {
uint32_t delta = packets_dropped_by_kernel - handle->packets_dropped_by_kernel;
handle->udx->packets_dropped_by_kernel += delta;
handle->packets_dropped_by_kernel = packets_dropped_by_kernel;
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/io_win.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,16 @@ udx__udp_set_rxq_ovfl (uv_os_sock_t fd) {
UDX_UNUSED(fd);
return -1;
}

int
udx__udp_set_dontfrag (uv_os_sock_t fd, bool is_ipv6) {
int rc;
int val = IP_PMTUDISC_PROBE;
if (is_ipv6) {
rc = setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &val, sizeof(val));
} else {
rc = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val));
}

return rc;
}
10 changes: 10 additions & 0 deletions src/udx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1956,6 +1956,7 @@ udx_init (uv_loop_t *loop, udx_t *udx) {
udx->packets_rx = 0;
udx->packets_tx = 0;

udx->packets_dropped_by_kernel = -1;
udx->loop = loop;

return 0;
Expand Down Expand Up @@ -2080,6 +2081,15 @@ udx_socket_bind (udx_socket_t *socket, const struct sockaddr *addr, unsigned int
if (!err) {
socket->cmsg_wanted = true;
socket->packets_dropped_by_kernel = 0;

if (socket->udx->packets_dropped_by_kernel == -1) {
socket->udx->packets_dropped_by_kernel = 0;
}
}

err = udx__udp_set_dontfrag((uv_os_sock_t) fd, socket->family == 6);
if (err) {
debug_printf("udx: failed to set IP Don't Fragment socket option\n");
}

socket->status |= UDX_SOCKET_BOUND;
Expand Down
2 changes: 2 additions & 0 deletions test/stream-write-read-perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ main () {
if (asock.packets_dropped_by_kernel != -1 && bsock.packets_dropped_by_kernel != -1) {
printf("stats: socket a: packets_dropped=%" PRIi64 "\n", asock.packets_dropped_by_kernel);
printf("stats: socket b: packets_dropped=%" PRIi64 "\n", bsock.packets_dropped_by_kernel);

assert(asock.packets_dropped_by_kernel + bsock.packets_dropped_by_kernel == udx.packets_dropped_by_kernel);
}

return 0;
Expand Down
2 changes: 1 addition & 1 deletion test/stream-write-read-receive-window.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ main () {
assert(e == 0);

int data_sz = UDX_MTU_MAX * 6;
uint8_t *data = malloc(data_sz);
char *data = malloc(data_sz);
uv_buf_t buf = uv_buf_init(data, data_sz);

e = udx_stream_write(req, &send_stream, &buf, 1, on_ack);
Expand Down

0 comments on commit b737b2d

Please sign in to comment.