From 6719331c2f736fba881f2ce85c8b0e37fe054c46 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 14 Sep 2020 10:01:09 +0200 Subject: [PATCH] mptcp: trigger msk processing even for OoO data This is a prerequisite to allow receiving data from multiple subflows without re-injection. Instead of dropping the OoO - "future" data in subflow_check_data_avail(), call into __mptcp_move_skbs() and let the msk drop that. To avoid code duplication factor out the mptcp_subflow_discard_data() helper. Note that __mptcp_move_skbs() can now find multiple subflows with data avail (comprising to-be-discarded data), so must update the byte counter incrementally. v1 -> v2: - fix checkpatch issues (unsigned -> unsigned int) Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 33 ++++++++++++++++++---- net/mptcp/protocol.h | 9 +++++- net/mptcp/subflow.c | 78 +++++++++++++++++++++++++++++----------------------- 3 files changed, 78 insertions(+), 42 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 854a8b3b9ecd..95573c6f7762 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -167,7 +167,8 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, return true; subflow->data_avail = 0; - return mptcp_subflow_data_available(ssk); + mptcp_subflow_data_available(ssk); + return subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; } static void mptcp_check_data_fin_ack(struct sock *sk) @@ -313,11 +314,18 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct tcp_sock *tp; bool done = false; - if (!mptcp_subflow_dsn_valid(msk, ssk)) { - *bytes = 0; + pr_debug("msk=%p ssk=%p data avail=%d valid=%d empty=%d", + msk, ssk, subflow->data_avail, + mptcp_subflow_dsn_valid(msk, ssk), + !skb_peek(&ssk->sk_receive_queue)); + if (subflow->data_avail == MPTCP_SUBFLOW_OOO_DATA) { + mptcp_subflow_discard_data(ssk, subflow->map_data_len); return false; } + if (!mptcp_subflow_dsn_valid(msk, ssk)) + return false; + tp = tcp_sk(ssk); do { u32 map_remaining, offset; @@ -376,7 +384,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } } while (more_data_avail); - *bytes = moved; + *bytes += moved; /* If the moves have caught up with the DATA_FIN sequence number * it's time to ack the DATA_FIN and change socket state, but @@ -415,9 +423,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) void mptcp_data_ready(struct sock *sk, struct sock *ssk) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(sk); + bool wake; - set_bit(MPTCP_DATA_READY, &msk->flags); + /* move_skbs_to_msk below can legitly clear the data_avail flag, + * but we will need later to properly woke the reader, cache its + * value + */ + wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; + if (wake) + set_bit(MPTCP_DATA_READY, &msk->flags); if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) && move_skbs_to_msk(msk, ssk)) @@ -438,7 +454,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) move_skbs_to_msk(msk, ssk); } wake: - sk->sk_data_ready(sk); + if (wake) + sk->sk_data_ready(sk); } static void __mptcp_flush_join_list(struct mptcp_sock *msk) @@ -1281,6 +1298,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, set_bit(MPTCP_DATA_READY, &msk->flags); } out_err: + pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", + msk, test_bit(MPTCP_DATA_READY, &msk->flags), + skb_queue_empty(&sk->sk_receive_queue), copied); mptcp_rcv_space_adjust(msk, copied); release_sock(sk); @@ -2308,6 +2328,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); state = inet_sk_state_load(sk); + pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); if (state == TCP_LISTEN) return mptcp_check_readable(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60b27d44c184..794997852c5e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -268,6 +268,12 @@ mptcp_subflow_rsk(const struct request_sock *rsk) return (struct mptcp_subflow_request_sock *)rsk; } +enum mptcp_data_avail { + MPTCP_SUBFLOW_NODATA, + MPTCP_SUBFLOW_DATA_AVAIL, + MPTCP_SUBFLOW_OOO_DATA +}; + /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ @@ -292,10 +298,10 @@ struct mptcp_subflow_context { map_valid : 1, mpc_map : 1, backup : 1, - data_avail : 1, rx_eof : 1, use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ can_ack : 1; /* only after processing the remote a key */ + enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -347,6 +353,7 @@ int mptcp_is_enabled(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); +int mptcp_subflow_discard_data(struct sock *sk, unsigned int limit); void __init mptcp_subflow_init(void); /* called with sk socket lock held */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 53b455c3c229..071ee54b3c9f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -816,6 +816,40 @@ static int subflow_read_actor(read_descriptor_t *desc, return copy_len; } +int mptcp_subflow_discard_data(struct sock *ssk, unsigned int limit) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u32 map_remaining; + size_t delta; + + map_remaining = subflow->map_data_len - + mptcp_subflow_get_map_offset(subflow); + delta = min_t(size_t, limit, map_remaining); + + /* discard mapped data */ + pr_debug("discarding %zu bytes, current map len=%d", delta, + map_remaining); + if (delta) { + read_descriptor_t desc = { + .count = delta, + }; + int ret; + + ret = tcp_read_sock(ssk, &desc, subflow_read_actor); + if (ret < 0) { + ssk->sk_err = -ret; + return ret; + } + if (ret < delta) + return 0; + if (delta == map_remaining) { + subflow->data_avail = 0; + subflow->map_valid = 0; + } + } + return 0; +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -832,8 +866,6 @@ static bool subflow_check_data_avail(struct sock *ssk) msk = mptcp_sk(subflow->conn); for (;;) { - u32 map_remaining; - size_t delta; u64 ack_seq; u64 old_ack; @@ -851,7 +883,7 @@ static bool subflow_check_data_avail(struct sock *ssk) subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - subflow->data_avail = 1; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; return true; } @@ -880,43 +912,19 @@ static bool subflow_check_data_avail(struct sock *ssk) pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, ack_seq); if (ack_seq == old_ack) { - subflow->data_avail = 1; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + break; + } else if (after64(ack_seq, old_ack)) { + subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; break; } /* only accept in-sequence mapping. Old values are spurious - * retransmission; we can hit "future" values on active backup - * subflow switch, we relay on retransmissions to get - * in-sequence data. - * Cuncurrent subflows support will require subflow data - * reordering + * retransmission */ - map_remaining = subflow->map_data_len - - mptcp_subflow_get_map_offset(subflow); - if (before64(ack_seq, old_ack)) - delta = min_t(size_t, old_ack - ack_seq, map_remaining); - else - delta = min_t(size_t, ack_seq - old_ack, map_remaining); - - /* discard mapped data */ - pr_debug("discarding %zu bytes, current map len=%d", delta, - map_remaining); - if (delta) { - read_descriptor_t desc = { - .count = delta, - }; - int ret; - - ret = tcp_read_sock(ssk, &desc, subflow_read_actor); - if (ret < 0) { - ssk->sk_err = -ret; - goto fatal; - } - if (ret < delta) - return false; - if (delta == map_remaining) - subflow->map_valid = 0; - } + if (mptcp_subflow_discard_data(ssk, old_ack - ack_seq)) + goto fatal; + return false; } return true; -- 2.11.0