From c8071377b0ac744679ef54120fafda52c7064700 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 28 May 2019 15:16:15 -0700 Subject: [PATCH] mptcp: Correctly set the tcp_tsorted_anchor on skbs Since v4.15, the stack is using a list in skbs to handle the retransmissions. MPTCP does not really need it, but it needs to set/reset the list correctly on the skb. Thus, make sure we add and remove the skbs from the list with a call to tcp_update_skb_after_send() and list_del(&skb->tcp_tsorted_anchor);. We also need to call INIT_LIST_HEAD after calls to tcp_rtx_queue_unlink() because the latter sets all pointers to NULL. Fixes: 6a9aee5df892 ("mptcp: Fix tsorted_anchor usage") Signed-off-by: Christoph Paasch Signed-off-by: Matthieu Baerts (cherry picked from commit 5919dd53cdd13718cafeeccf1418d75b8eedc672) Signed-off-by: Matthieu Baerts Conflicts: net/ipv4/tcp_output.c net/mptcp/mptcp_output.c --- include/linux/skbuff.h | 5 ----- include/net/tcp.h | 1 + net/ipv4/tcp_output.c | 2 +- net/mptcp/mptcp_input.c | 17 +++++++++++++---- net/mptcp/mptcp_output.c | 12 ++++++++---- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e5bc3d667252..2da4466dafcf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3227,11 +3227,6 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) for (skb = skb_rb_first(root); skb != NULL; \ skb = skb_rb_next(skb)) -#define skb_rbtree_walk_safe(skb, root, tmp) \ - for (skb = skb_rb_first(root); \ - tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ - skb = tmp) - #define skb_rbtree_walk_from(skb) \ for (; skb != NULL; \ skb = skb_rb_next(skb)) diff --git a/include/net/tcp.h b/include/net/tcp.h index 33307aaa734b..06cdbfdc70d5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -378,6 +378,7 @@ bool retransmits_timed_out(struct sock *sk, unsigned int timeout); void tcp_write_err(struct sock *sk); void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr); +void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb); void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now); void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 316c60888625..08dae8db3732 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1032,7 +1032,7 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) sock_hold(sk); } -static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) +void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) { skb->skb_mstamp = tp->tcp_mstamp; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c index 510cc49933e9..5e18891cbdb5 100644 --- a/net/mptcp/mptcp_input.c +++ b/net/mptcp/mptcp_input.c @@ -596,15 +596,19 @@ static void mptcp_restart_sending(struct sock *meta_sk) { struct tcp_sock *meta_tp = tcp_sk(meta_sk); struct mptcp_cb *mpcb = meta_tp->mpcb; - struct sk_buff *skb, *tmp, *wq_head; + struct sk_buff *wq_head, *skb, *tmp; + + skb = tcp_rtx_queue_head(meta_sk); /* We resend everything that has not been acknowledged, thus we need * to move it from the rtx-tree to the write-queue. */ wq_head = tcp_write_queue_head(meta_sk); - skb_rbtree_walk_safe(skb, &meta_sk->tcp_rtx_queue, tmp) { + + skb_rbtree_walk_from_safe(skb, tmp) { list_del(&skb->tcp_tsorted_anchor); tcp_rtx_queue_unlink(skb, meta_sk); + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); if (wq_head) __skb_queue_before(&meta_sk->sk_write_queue, wq_head, skb); @@ -2185,14 +2189,16 @@ static void mptcp_rcv_synsent_fastopen(struct sock *meta_sk) { struct tcp_sock *meta_tp = tcp_sk(meta_sk); struct tcp_sock *master_tp = tcp_sk(meta_tp->mpcb->master_sk); - struct sk_buff *skb, *tmp; u32 new_mapping = meta_tp->write_seq - master_tp->snd_una; + struct sk_buff *skb, *tmp; + + skb = tcp_rtx_queue_head(meta_sk); /* There should only be one skb in write queue: the data not * acknowledged in the SYN+ACK. In this case, we need to map * this data to data sequence numbers. */ - skb_rbtree_walk_safe(skb, &meta_sk->tcp_rtx_queue, tmp) { + skb_rbtree_walk_from_safe(skb, tmp) { /* If the server only acknowledges partially the data sent in * the SYN, we need to trim the acknowledged part because * we don't want to retransmit this already received data. @@ -2217,7 +2223,10 @@ static void mptcp_rcv_synsent_fastopen(struct sock *meta_sk) TCP_SKB_CB(skb)->seq += new_mapping; TCP_SKB_CB(skb)->end_seq += new_mapping; + list_del(&skb->tcp_tsorted_anchor); tcp_rtx_queue_unlink(skb, meta_sk); + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + tcp_add_write_queue_tail(meta_sk, skb); } diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c index 8d4c0a2fdde2..f1339436536f 100644 --- a/net/mptcp/mptcp_output.c +++ b/net/mptcp/mptcp_output.c @@ -143,6 +143,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk } else { list_del(&orig_skb->tcp_tsorted_anchor); tcp_rtx_queue_unlink(orig_skb, sk); + INIT_LIST_HEAD(&orig_skb->tcp_tsorted_anchor); } sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= orig_skb->truesize; @@ -319,8 +320,10 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) TCP_FRAG_IN_WRITE_QUEUE); } - skb_rbtree_walk_safe(skb_it, &sk->tcp_rtx_queue, tmp) { + skb_it = tcp_rtx_queue_head(sk); + skb_rbtree_walk_from_safe(skb_it, tmp) { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it); + /* Subflow syn's and fin's are not reinjected. * * As well as empty subflow-fins with a data-fin. @@ -704,7 +707,7 @@ int mptcp_write_wakeup(struct sock *meta_sk, int mib) tcp_event_new_data_sent(meta_sk, skb); __tcp_push_pending_frames(subsk, mss, TCP_NAGLE_PUSH); - skb->skb_mstamp = meta_tp->tcp_mstamp; + tcp_update_skb_after_send(meta_tp, skb); meta_tp->lsndtime = tcp_jiffies32; return 0; @@ -852,7 +855,8 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, * always push on the subflow */ __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH); - skb->skb_mstamp = meta_tp->tcp_mstamp; + if (reinject <= 0) + tcp_update_skb_after_send(meta_tp, skb); meta_tp->lsndtime = tcp_jiffies32; path_mask |= mptcp_pi_to_flag(subtp->mptcp->path_index); @@ -1589,7 +1593,7 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) } __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH); - skb->skb_mstamp = meta_tp->tcp_mstamp; + tcp_update_skb_after_send(meta_tp, skb); meta_tp->lsndtime = tcp_jiffies32; return 0;