Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Commit

Permalink
Receive-buffer
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
  • Loading branch information
Christoph Paasch committed Sep 23, 2011
1 parent 91674b3 commit 17a4d79
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 31 deletions.
11 changes: 6 additions & 5 deletions include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,11 @@ struct multipath_pcb {
#define MPTCP_SUB_LEN_FAIL 8
#define MPTCP_SUB_LEN_FAIL_ALIGN 8

#ifdef DEBUG_PITOFLAG
static inline int PI_TO_FLAG(int pi)
static inline int mptcp_pi_to_flag(int pi)
{
BUG_ON(!pi);
return 1 << (pi - 1);
}
#else
#define PI_TO_FLAG(pi) (1 << (pi - 1))
#endif

/* Possible return values from mptcp_queue_skb */
#define MPTCP_EATEN 1 /* The skb has been (fully or partially) eaten by
Expand Down Expand Up @@ -481,6 +477,7 @@ void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk);
void mptcp_fin(struct multipath_pcb *mpcb);
void mptcp_retransmit_timer(struct sock *meta_sk);
void mptcp_mark_reinjected(struct sock *sk, struct sk_buff *skb);
struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk);

static inline int mptcp_skb_cloned(const struct sk_buff *skb,
const struct tcp_sock *tp)
Expand Down Expand Up @@ -966,6 +963,10 @@ static inline void mptcp_clean_rtx_infinite(struct sk_buff *skb,
struct sock *sk) {}
static inline void mptcp_retransmit_timer(struct sock *meta_sk) {}
static inline void mptcp_mark_reinjected(struct sock *sk, struct sk_buff *skb) {}
static inline struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk)
{
return NULL;
}
static inline void mptcp_set_rto(struct sock *sk) {}
static inline void mptcp_reset_xmit_timer(struct sock *meta_sk) {}
static inline void mptcp_send_fin(struct sock *meta_sk) {}
Expand Down
104 changes: 91 additions & 13 deletions net/ipv4/mptcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ static inline int mptcp_is_available(struct sock *sk)
return 0;

tp = tcp_sk(sk);
if (tp->pf || (tp->mpcb->noneligible & PI_TO_FLAG(tp->path_index)) ||
if (tp->pf || (tp->mpcb->noneligible & mptcp_pi_to_flag(tp->path_index)) ||
inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
return 0;
if (tcp_cwnd_test(tp))
Expand Down Expand Up @@ -247,7 +247,7 @@ static struct sock *get_available_subflow(struct multipath_pcb *mpcb,
/* If the skb has already been enqueued in this sk, try to find
* another one
*/
if (unlikely(PI_TO_FLAG(tp->path_index) & skb->path_mask))
if (skb && unlikely(mptcp_pi_to_flag(tp->path_index) & skb->path_mask))
continue;

if (!mptcp_is_available(sk))
Expand Down Expand Up @@ -299,7 +299,7 @@ static struct sock *rr_scheduler(struct multipath_pcb *mpcb,
/* If the skb has already been enqueued in this sk, try to find
* another one
*/
if (unlikely(PI_TO_FLAG(tp->path_index) & skb->path_mask))
if (unlikely(mptcp_pi_to_flag(tp->path_index) & skb->path_mask))
continue;

if (!mptcp_is_available(sk))
Expand All @@ -316,7 +316,7 @@ static struct sock *rr_scheduler(struct multipath_pcb *mpcb,
/* If the skb has already been enqueued in this sk,
* try to find another one.
*/
if (unlikely(PI_TO_FLAG(tp->path_index) & skb->path_mask))
if (unlikely(mptcp_pi_to_flag(tp->path_index) & skb->path_mask))
continue;

if (!mptcp_is_available(sk))
Expand Down Expand Up @@ -350,6 +350,7 @@ int sysctl_mptcp_ndiffports __read_mostly = 1;
int sysctl_mptcp_enabled __read_mostly = 1;
int sysctl_mptcp_scheduler __read_mostly = 1;
int sysctl_mptcp_checksum __read_mostly = 1;
int sysctl_mptcp_rbuf_opti __read_mostly = 0;

static ctl_table mptcp_table[] = {
{
Expand Down Expand Up @@ -389,6 +390,13 @@ static ctl_table mptcp_table[] = {
.extra1 = &mptcp_sched_min,
.extra2 = &mptcp_sched_max
},
{
.procname = "mptcp_rbuf_opti",
.data = &sysctl_mptcp_rbuf_opti,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{ }
};

Expand Down Expand Up @@ -461,7 +469,7 @@ int mptcp_init_subsockets(struct multipath_pcb *mpcb, u32 path_indices)
/* First, ensure that we keep existing path indices. */
mptcp_for_each_tp(mpcb, tp)
/* disable the corresponding bit of the existing subflow */
path_indices &= ~PI_TO_FLAG(tp->path_index);
path_indices &= ~mptcp_pi_to_flag(tp->path_index);

for (i = 0; i < sizeof(path_indices) * 8; i++) {
struct sock *sk, *meta_sk = (struct sock *)mpcb;
Expand Down Expand Up @@ -699,7 +707,7 @@ static int __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk,
continue;
/* If the skb has already been enqueued in this sk, try to find
* another one */
if (PI_TO_FLAG(tmp_tp->path_index) & orig_skb->path_mask)
if (mptcp_pi_to_flag(tmp_tp->path_index) & orig_skb->path_mask)
continue;

/* candidate subflow found, we can reinject */
Expand Down Expand Up @@ -760,7 +768,7 @@ void mptcp_reinject_data(struct sock *sk, int clone_it)
(tcb->flags & TCPHDR_FIN &&
!(tcb->mptcp_flags & MPTCPHDR_FIN)))
continue;
skb_it->path_mask |= PI_TO_FLAG(tp->path_index);
skb_it->path_mask |= mptcp_pi_to_flag(tp->path_index);
if (__mptcp_reinject_data(skb_it, meta_sk, sk, clone_it) < 0)
break;
tp->reinjected_seq = tcb->end_seq;
Expand Down Expand Up @@ -789,7 +797,8 @@ void mptcp_retransmit_timer(struct sock *meta_sk)
meta_icsk->icsk_rto, TCP_RTO_MAX * 2);
}

void mptcp_mark_reinjected(struct sock *sk, struct sk_buff *skb) {
void mptcp_mark_reinjected(struct sock *sk, struct sk_buff *skb)
{
struct sock *meta_sk;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb_it;
Expand All @@ -800,13 +809,65 @@ void mptcp_mark_reinjected(struct sock *sk, struct sk_buff *skb) {
meta_sk = mptcp_meta_sk(sk);
skb_it = tcp_write_queue_head(meta_sk);

while (skb_it && skb_it != tcp_send_head(meta_sk)) {
tcp_for_write_queue_from(skb_it, meta_sk) {
if (skb_it == tcp_send_head(meta_sk))
break;

if (TCP_SKB_CB(skb_it)->data_seq == TCP_SKB_CB(skb)->data_seq) {
skb_it->path_mask |= PI_TO_FLAG(tp->path_index);
skb_it->path_mask |= mptcp_pi_to_flag(tp->path_index);
break;
}
}
}

struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk)
{
struct sock *meta_sk, *sk_it;
struct tcp_sock *tp = tcp_sk(sk), *tp_it;
struct sk_buff *skb_it;

if (!tp->mpc || !sysctl_mptcp_rbuf_opti)
return NULL;

meta_sk = mptcp_meta_sk(sk);
skb_it = tcp_write_queue_head(meta_sk);

if (!skb_it)
return NULL;

/* Half the cwnd of the slow flow */
mptcp_for_each_sk(tp->mpcb, sk_it, tp_it) {
if (skb_it->path_mask & mptcp_pi_to_flag(tp_it->path_index)) {
u64 bw1, bw2;

/* Don't half our own cwnd */
if (tp_it == tp)
continue;

bw1 = (u64) tp_it->snd_cwnd << 32;
bw1 = div64_u64(bw1, tp_it->srtt);
bw2 = (u64) tp->snd_cwnd << 32;
bw2 = div64_u64(bw2, tp->srtt);

if (bw1 < bw2) {
tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
tp_it->snd_ssthresh = max(tp_it->snd_cwnd, 2U);
}
break;
}
skb_it = tcp_write_queue_next(meta_sk, skb_it);
}

/* Now, find a segment to reinject */
tcp_for_write_queue_from(skb_it, meta_sk) {
if (skb_it == tcp_send_head(meta_sk))
break;

/* Segment not yet injected into this path? Take it!!! */
if (!(skb_it->path_mask & mptcp_pi_to_flag(tp->path_index)))
return skb_it;
}

return NULL;
}


Expand Down Expand Up @@ -2385,7 +2446,9 @@ void mptcp_update_sndbuf(struct multipath_pcb *mpcb)
* (chooses the reinject queue if any segment is waiting in it, otherwise,
* chooses the normal write queue).
* Sets *@reinject to 1 if the returned segment comes from the
* reinject queue. Otherwise sets @reinject to 0.
* reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
* and sets it to -1 if it is a meta-level retransmission to optimize the
* receive-buffer.
*/
struct sk_buff *mptcp_next_segment(struct sock *sk, int *reinject)
{
Expand All @@ -2401,7 +2464,22 @@ struct sk_buff *mptcp_next_segment(struct sock *sk, int *reinject)
*reinject = 1;
return skb;
} else {
return tcp_send_head(sk);
skb = tcp_send_head(sk);

if (!skb && !sk_stream_memory_free(sk)) {
struct sock *subsk;
subsk = mptcp_schedulers[sysctl_mptcp_scheduler - 1](mpcb, NULL);

if (!subsk)
return NULL;

skb = mptcp_rcv_buf_optimization(subsk);
if (skb) {
if (reinject)
*reinject = -1;
}
}
return skb;
}
}

Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/mptcp_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,11 +694,11 @@ static void __mptcp_send_updatenotif(struct multipath_pcb *mpcb)
return;

for (i = 0; i < mpcb->pa4_size; i++)
path_indices |= PI_TO_FLAG(mpcb->pa4[i].path_index);
path_indices |= mptcp_pi_to_flag(mpcb->pa4[i].path_index);

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
for (i = 0; i < mpcb->pa6_size; i++)
path_indices |= PI_TO_FLAG(mpcb->pa6[i].path_index);
path_indices |= mptcp_pi_to_flag(mpcb->pa6[i].path_index);
#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */
mptcp_init_subsockets(mpcb, path_indices);
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_coupled.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ static void mptcp_recalc_alpha(struct sock *sk)
* Integer-overflow is not possible here, because
* tmp will be in u64.
*/
tmp = div64_u64 (mptcp_ccc_scale(sub_tp->snd_cwnd,
tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd,
alpha_scale_num), rtt * rtt);

if (tmp >= max_numerator) {
Expand Down
26 changes: 16 additions & 10 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -2301,7 +2301,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
struct sk_buff *subskb = NULL;
int err;

if (reinject && !after(mptcp_skb_end_data_seq(skb),
if (reinject > 0 && !after(mptcp_skb_end_data_seq(skb),
tp->snd_una)) {
/* another copy of the segment already reached
* the peer, just discard this one
Expand Down Expand Up @@ -2333,7 +2333,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* This must be invoked even if we don't want
* to support TSO at the moment
*/

retry:
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
/* At the moment we do not support tso, hence
Expand All @@ -2354,8 +2354,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}

if (unlikely(!tcp_snd_wnd_test(subtp, skb, mss_now)))
if (unlikely(!tcp_snd_wnd_test(subtp, skb, mss_now))) {
skb = mptcp_rcv_buf_optimization(subsk);
if (skb) {
reinject = -1;
goto retry;
}
break;
}

if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
Expand Down Expand Up @@ -2385,17 +2391,17 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
#else
if (sk != subsk) {
if (tp->path_index)
skb->path_mask |= PI_TO_FLAG(tp->path_index);
if (subtp->path_index)
skb->path_mask |= mptcp_pi_to_flag(subtp->path_index);
/* If the segment is reinjected, the clone is done
* already
*/
if (!reinject) {
if (reinject <= 0) {
/* The segment may be a meta-level
* retransmission. In this case, we also have to
* copy the TCP/IP-headers. (pskb_copy)
*/
if (unlikely(skb->path_mask & ~PI_TO_FLAG(tp->path_index)))
if (unlikely(skb->path_mask & ~mptcp_pi_to_flag(subtp->path_index)))
subskb = pskb_copy(skb, GFP_ATOMIC);
else
subskb = skb_clone(skb, GFP_ATOMIC);
Expand Down Expand Up @@ -2468,7 +2474,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
subtp->write_seq -= subskb->len;
mptcp_wmem_free_skb(subsk, subskb);

tp->mpcb->noneligible |= PI_TO_FLAG(subtp->path_index);
tp->mpcb->noneligible |= mptcp_pi_to_flag(subtp->path_index);

continue;
}
Expand All @@ -2493,7 +2499,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
BUG_ON(tcp_send_head(sk) != skb);
tcp_event_new_data_sent(sk, skb);
}
if (sk!= subsk && reinject) {
if (sk!= subsk && reinject > 0) {
mptcp_mark_reinjected(subsk, skb);
}

Expand Down Expand Up @@ -2583,7 +2589,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
skb = mptcp_next_segment(sk, &reinject);
BUG_ON(!skb);

while (reinject && !after(TCP_SKB_CB(skb)->end_data_seq, tp->snd_una)) {
while (reinject > 0&& !after(TCP_SKB_CB(skb)->end_data_seq, tp->snd_una)) {
/* another copy of the segment already reached
* the peer, just discard this one.
*/
Expand Down

0 comments on commit 17a4d79

Please sign in to comment.