Skip to content

Commit

Permalink
tcp: add a force_schedule argument to sk_stream_alloc_skb()
Browse files Browse the repository at this point in the history
In commit 8e4d980 ("tcp: fix behavior for epoll edge trigger")
we fixed a possible hang of TCP sockets under memory pressure,
by allowing sk_stream_alloc_skb() to use sk_forced_mem_schedule()
if no packet is in socket write queue.

It turns out there are other cases where we want to force memory
schedule :

tcp_fragment() & tso_fragment() need to split a big TSO packet into
two smaller ones. If we block here because of TCP memory pressure,
we can effectively block TCP socket from sending new data.
If no further ACK is coming, this hang would be definitive, and socket
has no chance to effectively reduce its memory usage.

Signed-off-by: Eric Dumazet <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Eric Dumazet authored and davem330 committed May 21, 2015
1 parent 765c9c6 commit eb93447
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 14 deletions.
3 changes: 2 additions & 1 deletion include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -2025,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
}
}

struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
bool force_schedule);

/**
* sk_page_frag - return an appropriate page_frag
Expand Down
19 changes: 11 additions & 8 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,8 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);

struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
bool force_schedule)
{
struct sk_buff *skb;

Expand All @@ -820,15 +821,15 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)

skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
if (likely(skb)) {
bool mem_schedule;
bool mem_scheduled;

if (skb_queue_len(&sk->sk_write_queue) == 0) {
mem_schedule = true;
if (force_schedule) {
mem_scheduled = true;
sk_forced_mem_schedule(sk, skb->truesize);
} else {
mem_schedule = sk_wmem_schedule(sk, skb->truesize);
mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
}
if (likely(mem_schedule)) {
if (likely(mem_scheduled)) {
skb_reserve(skb, sk->sk_prot->max_header);
/*
* Make sure that we have exactly size bytes
Expand Down Expand Up @@ -918,7 +919,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;

skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;

Expand Down Expand Up @@ -1154,7 +1156,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)

skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
sk->sk_allocation);
sk->sk_allocation,
skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;

Expand Down
10 changes: 5 additions & 5 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
return -ENOMEM;

/* Get a new skb... force flag on. */
buff = sk_stream_alloc_skb(sk, nsize, gfp);
buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */

Expand Down Expand Up @@ -1731,7 +1731,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (skb->len != skb->data_len)
return tcp_fragment(sk, skb, len, mss_now, gfp);

buff = sk_stream_alloc_skb(sk, 0, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;

Expand Down Expand Up @@ -1950,7 +1950,7 @@ static int tcp_mtu_probe(struct sock *sk)
}

/* We're allowed to probe. Build it now. */
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC);
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
sk->sk_wmem_queued += nskb->truesize;
Expand Down Expand Up @@ -3190,7 +3190,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));

syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
Expand Down Expand Up @@ -3256,7 +3256,7 @@ int tcp_connect(struct sock *sk)
return 0;
}

buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS;

Expand Down

0 comments on commit eb93447

Please sign in to comment.