From 93b174ad71b08e504c2cf6e8a58ecce778b77a40 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 6 Dec 2012 08:45:32 +0000 Subject: [PATCH 1/2] tcp: bug fix Fast Open client retransmission If SYN-ACK partially acks SYN-data, the client retransmits the remaining data by tcp_retransmit_skb(). This increments lost recovery state variables like tp->retrans_out in Open state. If loss recovery happens before the retransmission is acked, it triggers the WARN_ON check in tcp_fastretrans_alert(). For example: the client sends SYN-data, gets SYN-ACK acking only ISN, retransmits data, sends another 4 data packets and get 3 dupacks. Since the retransmission is not caused by network drop it should not update the recovery state variables. Further the server may return a smaller MSS than the cached MSS used for SYN-data, so the retranmission needs a loop. Otherwise some data will not be retransmitted until timeout or other loss recovery events. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 6 +++++- net/ipv4/tcp_output.c | 15 ++++++++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6feeccd83dd7..4af45e33105d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -525,6 +525,7 @@ static inline __u32 cookie_v6_init_sequence(struct sock *sk, extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); extern bool tcp_may_send_now(struct sock *sk); +extern int __tcp_retransmit_skb(struct sock *, struct sk_buff *); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_retransmit_timer(struct sock *sk); extern void tcp_xmit_retransmit_queue(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 609ff98aeb47..181fc8234a52 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5645,7 +5645,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); if (data) { /* Retransmit unacked data in SYN */ - tcp_retransmit_skb(sk, data); + tcp_for_write_queue_from(data, sk) { + if (data == tcp_send_head(sk) || + __tcp_retransmit_skb(sk, data)) + break; + } tcp_rearm_rto(sk); return true; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2798706cb063..948ac275b9b5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2309,12 +2309,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ -int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss; - int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { @@ -2387,11 +2386,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; } else { - err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } +} + +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + int err = __tcp_retransmit_skb(sk, skb); if (err == 0) { /* Update global TCP statistics. */ From c3c7c254b2e8cd99b0adf288c2a1bddacd7ba255 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Dec 2012 13:54:59 +0000 Subject: [PATCH 2/2] net: gro: fix possible panic in skb_gro_receive() commit 2e71a6f8084e (net: gro: selective flush of packets) added a bug for skbs using frag_list. This part of the GRO stack is rarely used, as it needs skb not using a page fragment for their skb->head. Most drivers do use a page fragment, but some of them use GFP_KERNEL allocations for the initial fill of their RX ring buffer. napi_gro_flush() overwrite skb->prev that was used for these skb to point to the last skb in frag_list. Fix this using a separate field in struct napi_gro_cb to point to the last fragment. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 2 ++ net/core/skbuff.c | 6 +++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8eda0276f03..a848ffc327f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1488,6 +1488,9 @@ struct napi_gro_cb { /* Used in ipv6_gro_receive() */ int proto; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/net/core/dev.c b/net/core/dev.c index c0946cb2b354..e5942bf45a6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3451,6 +3451,8 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; + BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); + if (NAPI_GRO_CB(skb)->count == 1) { skb_shinfo(skb)->gso_size = 0; goto out; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4007c1437fda..3f0636cd76cd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3004,7 +3004,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_shinfo(nskb)->gso_size = pinfo->gso_size; pinfo->gso_size = 0; skb_header_release(p); - nskb->prev = p; + NAPI_GRO_CB(nskb)->last = p; nskb->data_len += p->len; nskb->truesize += p->truesize; @@ -3030,8 +3030,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) __skb_pull(skb, offset); - p->prev->next = skb; - p->prev = skb; + NAPI_GRO_CB(p)->last->next = skb; + NAPI_GRO_CB(p)->last = skb; skb_header_release(skb); done: