Skip to content

Commit

Permalink
mptcp: implement wmem reservation
Browse files Browse the repository at this point in the history
This leverage the previous commit to reserve the wmem
required for the sendmsg() operation when the msk socket
lock is first acquired.
Some heuristics are used to get a reasonable [over] estimation of
the whole memory required. If we can't forward alloc such amount
fallback to a reasonable small chunk, otherwise enter the wait
for memory path.

When sendmsg() need more memory it looks at wmem_reserved
first and if that is exaused, move more space from
sk_forward_alloc.

The reserved memory is a transient state and is released at the
next socket unlock via the release_cb().

Overall this will simplify the next patch.

Acked-by: Florian Westphal <[email protected]>
Signed-off-by: Paolo Abeni <[email protected]>
  • Loading branch information
Paolo Abeni authored and jenkins-tessares committed Nov 20, 2020
1 parent 5fb346f commit 20e1cdd
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 7 deletions.
92 changes: 85 additions & 7 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,81 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}

static int mptcp_wmem_with_overhead(int size)
{
return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
}

static void __mptcp_wmem_reserve(struct sock *sk, int size)
{
int amount = mptcp_wmem_with_overhead(size);
struct mptcp_sock *msk = mptcp_sk(sk);

WARN_ON_ONCE(msk->wmem_reserved);
if (amount <= sk->sk_forward_alloc)
goto reserve;

/* under memory pressure try to reserve at most a single page
* otherwise try to reserve the full estimate and fallback
* to a single page before entering the error path
*/
if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) ||
!sk_wmem_schedule(sk, amount)) {
if (amount <= PAGE_SIZE)
goto nomem;

amount = PAGE_SIZE;
if (!sk_wmem_schedule(sk, amount))
goto nomem;
}

reserve:
msk->wmem_reserved = amount;
sk->sk_forward_alloc -= amount;
return;

nomem:
/* we will wait for memory on next allocation */
msk->wmem_reserved = -1;
}

static void __mptcp_update_wmem(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);

if (!msk->wmem_reserved)
return;

if (msk->wmem_reserved < 0)
msk->wmem_reserved = 0;
if (msk->wmem_reserved > 0) {
sk->sk_forward_alloc += msk->wmem_reserved;
msk->wmem_reserved = 0;
}
}

static bool mptcp_wmem_alloc(struct sock *sk, int size)
{
struct mptcp_sock *msk = mptcp_sk(sk);

/* check for pre-existing error condition */
if (msk->wmem_reserved < 0)
return false;

if (msk->wmem_reserved >= size)
goto account;

if (!sk_wmem_schedule(sk, size))
return false;

sk->sk_forward_alloc -= size;
msk->wmem_reserved += size;

account:
msk->wmem_reserved -= size;
return true;
}

static void dfrag_uncharge(struct sock *sk, int len)
{
sk_mem_uncharge(sk, len);
Expand Down Expand Up @@ -904,7 +979,7 @@ static void mptcp_clean_una(struct sock *sk)
}

out:
if (cleaned)
if (cleaned && tcp_under_memory_pressure(sk))
sk_mem_reclaim_partial(sk);
}

Expand Down Expand Up @@ -1281,7 +1356,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -EOPNOTSUPP;

lock_sock(sk);
mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len));

timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

Expand Down Expand Up @@ -1330,11 +1405,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
offset = dfrag->offset + dfrag->data_len;
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
if (!sk_wmem_schedule(sk, psize + frag_truesize))
if (!mptcp_wmem_alloc(sk, psize + frag_truesize))
goto wait_for_memory;

if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
msk->wmem_reserved += psize + frag_truesize;
ret = -EFAULT;
goto out;
}
Expand All @@ -1350,7 +1426,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
* Note: we charge such data both to sk and ssk
*/
sk_wmem_queued_add(sk, frag_truesize);
sk->sk_forward_alloc -= frag_truesize;
if (!dfrag_collapsed) {
get_page(dfrag->page);
list_add_tail(&dfrag->list, &msk->rtx_queue);
Expand Down Expand Up @@ -1971,6 +2046,7 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_WORK(&msk->work, mptcp_worker);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;

msk->ack_hint = NULL;
msk->first = NULL;
Expand Down Expand Up @@ -2165,6 +2241,7 @@ static void __mptcp_destroy_sock(struct sock *sk)

sk->sk_prot->destroy(sk);

WARN_ON_ONCE(msk->wmem_reserved);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
Expand Down Expand Up @@ -2510,13 +2587,14 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,

#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)

/* this is very alike tcp_release_cb() but we must handle differently a
* different set of events
*/
/* processes deferred events and flush wmem */
static void mptcp_release_cb(struct sock *sk)
{
unsigned long flags, nflags;

/* clear any wmem reservation and errors */
__mptcp_update_wmem(sk);

do {
flags = sk->sk_tsq_flags;
if (!(flags & MPTCP_DEFERRED_ALL))
Expand Down
1 change: 1 addition & 0 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ struct mptcp_sock {
u64 ack_seq;
u64 rcv_wnd_sent;
u64 rcv_data_fin_seq;
int wmem_reserved;
struct sock *last_snd;
int snd_burst;
int old_wspace;
Expand Down

0 comments on commit 20e1cdd

Please sign in to comment.