Skip to content

Commit 29b5e5e

Browse files
Paolo Abenidavem330
Paolo Abeni
authored andcommitted
mptcp: implement TCP_NOTSENT_LOWAT support
Add support for such socket option storing the user-space provided value in a new msk field, and using such data to implement the _mptcp_stream_memory_free() helper, similar to the TCP one. To avoid adding more indirect calls in the fast path, open-code a variant of sk_stream_memory_free() in mptcp_sendmsg() and add direct calls to the mptcp stream memory free helper where possible. Closes: multipath-tcp/mptcp_net-next#464 Signed-off-by: Paolo Abeni <[email protected]> Reviewed-by: Mat Martineau <[email protected]> Signed-off-by: Matthieu Baerts (NGI0) <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a747626 commit 29b5e5e

File tree

3 files changed

+73
-6
lines changed

3 files changed

+73
-6
lines changed

net/mptcp/protocol.c

+34-5
Original file line numberDiff line numberDiff line change
@@ -1762,6 +1762,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy,
17621762
return 0;
17631763
}
17641764

1765+
/* open-code sk_stream_memory_free() plus sent limit computation to
1766+
* avoid indirect calls in fast-path.
1767+
* Called under the msk socket lock, so we can avoid a bunch of ONCE
1768+
* annotations.
1769+
*/
1770+
static u32 mptcp_send_limit(const struct sock *sk)
1771+
{
1772+
const struct mptcp_sock *msk = mptcp_sk(sk);
1773+
u32 limit, not_sent;
1774+
1775+
if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf))
1776+
return 0;
1777+
1778+
limit = mptcp_notsent_lowat(sk);
1779+
if (limit == UINT_MAX)
1780+
return UINT_MAX;
1781+
1782+
not_sent = msk->write_seq - msk->snd_nxt;
1783+
if (not_sent >= limit)
1784+
return 0;
1785+
1786+
return limit - not_sent;
1787+
}
1788+
17651789
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17661790
{
17671791
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1806,16 +1830,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
18061830
struct mptcp_data_frag *dfrag;
18071831
bool dfrag_collapsed;
18081832
size_t psize, offset;
1833+
u32 copy_limit;
1834+
1835+
/* ensure fitting the notsent_lowat() constraint */
1836+
copy_limit = mptcp_send_limit(sk);
1837+
if (!copy_limit)
1838+
goto wait_for_memory;
18091839

18101840
/* reuse tail pfrag, if possible, or carve a new one from the
18111841
* page allocator
18121842
*/
18131843
dfrag = mptcp_pending_tail(sk);
18141844
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
18151845
if (!dfrag_collapsed) {
1816-
if (!sk_stream_memory_free(sk))
1817-
goto wait_for_memory;
1818-
18191846
if (!mptcp_page_frag_refill(sk, pfrag))
18201847
goto wait_for_memory;
18211848

@@ -1830,6 +1857,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
18301857
offset = dfrag->offset + dfrag->data_len;
18311858
psize = pfrag->size - offset;
18321859
psize = min_t(size_t, psize, msg_data_left(msg));
1860+
psize = min_t(size_t, psize, copy_limit);
18331861
total_ts = psize + frag_truesize;
18341862

18351863
if (!sk_wmem_schedule(sk, total_ts))
@@ -3760,6 +3788,7 @@ static struct proto mptcp_prot = {
37603788
.unhash = mptcp_unhash,
37613789
.get_port = mptcp_get_port,
37623790
.forward_alloc_get = mptcp_forward_alloc_get,
3791+
.stream_memory_free = mptcp_stream_memory_free,
37633792
.sockets_allocated = &mptcp_sockets_allocated,
37643793

37653794
.memory_allocated = &tcp_memory_allocated,
@@ -3933,12 +3962,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
39333962
{
39343963
struct sock *sk = (struct sock *)msk;
39353964

3936-
if (sk_stream_is_writeable(sk))
3965+
if (__mptcp_stream_is_writeable(sk, 1))
39373966
return EPOLLOUT | EPOLLWRNORM;
39383967

39393968
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
39403969
smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */
3941-
if (sk_stream_is_writeable(sk))
3970+
if (__mptcp_stream_is_writeable(sk, 1))
39423971
return EPOLLOUT | EPOLLWRNORM;
39433972

39443973
return 0;

net/mptcp/protocol.h

+27-1
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ struct mptcp_sock {
307307
in_accept_queue:1,
308308
free_first:1,
309309
rcvspace_init:1;
310+
u32 notsent_lowat;
310311
struct work_struct work;
311312
struct sk_buff *ooo_last_skb;
312313
struct rb_root out_of_order_queue;
@@ -807,11 +808,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
807808
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
808809
}
809810

811+
static inline u32 mptcp_notsent_lowat(const struct sock *sk)
812+
{
813+
struct net *net = sock_net(sk);
814+
u32 val;
815+
816+
val = READ_ONCE(mptcp_sk(sk)->notsent_lowat);
817+
return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
818+
}
819+
820+
static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake)
821+
{
822+
const struct mptcp_sock *msk = mptcp_sk(sk);
823+
u32 notsent_bytes;
824+
825+
notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt);
826+
return (notsent_bytes << wake) < mptcp_notsent_lowat(sk);
827+
}
828+
829+
static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake)
830+
{
831+
return mptcp_stream_memory_free(sk, wake) &&
832+
__sk_stream_is_writeable(sk, wake);
833+
}
834+
810835
static inline void mptcp_write_space(struct sock *sk)
811836
{
812837
/* pairs with memory barrier in mptcp_poll */
813838
smp_mb();
814-
sk_stream_write_space(sk);
839+
if (mptcp_stream_memory_free(sk, 1))
840+
sk_stream_write_space(sk);
815841
}
816842

817843
static inline void __mptcp_sync_sndbuf(struct sock *sk)

net/mptcp/sockopt.c

+12
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,16 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
812812
return 0;
813813
case TCP_ULP:
814814
return -EOPNOTSUPP;
815+
case TCP_NOTSENT_LOWAT:
816+
ret = mptcp_get_int_option(msk, optval, optlen, &val);
817+
if (ret)
818+
return ret;
819+
820+
lock_sock(sk);
821+
WRITE_ONCE(msk->notsent_lowat, val);
822+
mptcp_write_space(sk);
823+
release_sock(sk);
824+
return 0;
815825
case TCP_CONGESTION:
816826
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
817827
case TCP_CORK:
@@ -1345,6 +1355,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
13451355
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
13461356
case TCP_NODELAY:
13471357
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
1358+
case TCP_NOTSENT_LOWAT:
1359+
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
13481360
}
13491361
return -EOPNOTSUPP;
13501362
}

0 commit comments

Comments
 (0)