From 93b174ad71b08e504c2cf6e8a58ecce778b77a40 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 6 Dec 2012 08:45:32 +0000
Subject: [PATCH 1/2] tcp: bug fix Fast Open client retransmission

If SYN-ACK partially acks SYN-data, the client retransmits the
remaining data by tcp_retransmit_skb(). This increments lost recovery
state variables like tp->retrans_out in Open state. If loss recovery
happens before the retransmission is acked, it triggers the WARN_ON
check in tcp_fastretrans_alert(). For example: the client sends
SYN-data, gets SYN-ACK acking only ISN, retransmits data, sends
another 4 data packets and get 3 dupacks.

Since the retransmission is not caused by network drop it should not
update the recovery state variables. Further the server may return a
smaller MSS than the cached MSS used for SYN-data, so the retranmission
needs a loop. Otherwise some data will not be retransmitted until timeout
or other loss recovery events.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 +
 net/ipv4/tcp_input.c  |  6 +++++-
 net/ipv4/tcp_output.c | 15 ++++++++++-----
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6feeccd83dd7..4af45e33105d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -525,6 +525,7 @@ static inline __u32 cookie_v6_init_sequence(struct sock *sk,
 extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 				      int nonagle);
 extern bool tcp_may_send_now(struct sock *sk);
+extern int __tcp_retransmit_skb(struct sock *, struct sk_buff *);
 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 extern void tcp_retransmit_timer(struct sock *sk);
 extern void tcp_xmit_retransmit_queue(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 609ff98aeb47..181fc8234a52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5645,7 +5645,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
 
 	if (data) { /* Retransmit unacked data in SYN */
-		tcp_retransmit_skb(sk, data);
+		tcp_for_write_queue_from(data, sk) {
+			if (data == tcp_send_head(sk) ||
+			    __tcp_retransmit_skb(sk, data))
+				break;
+		}
 		tcp_rearm_rto(sk);
 		return true;
 	}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2798706cb063..948ac275b9b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2309,12 +2309,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
  * state updates are done by the caller.  Returns non-zero if an
  * error occurred which prevented the send.
  */
-int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned int cur_mss;
-	int err;
 
 	/* Inconslusive MTU probe */
 	if (icsk->icsk_mtup.probe_size) {
@@ -2387,11 +2386,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
 		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
 						   GFP_ATOMIC);
-		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-			     -ENOBUFS;
+		return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+			      -ENOBUFS;
 	} else {
-		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+		return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 	}
+}
+
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int err = __tcp_retransmit_skb(sk, skb);
 
 	if (err == 0) {
 		/* Update global TCP statistics. */

From c3c7c254b2e8cd99b0adf288c2a1bddacd7ba255 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Dec 2012 13:54:59 +0000
Subject: [PATCH 2/2] net: gro: fix possible panic in skb_gro_receive()

commit 2e71a6f8084e (net: gro: selective flush of packets) added
a bug for skbs using frag_list. This part of the GRO stack is rarely
used, as it needs skb not using a page fragment for their skb->head.

Most drivers do use a page fragment, but some of them use GFP_KERNEL
allocations for the initial fill of their RX ring buffer.

napi_gro_flush() overwrite skb->prev that was used for these skb to
point to the last skb in frag_list.

Fix this using a separate field in struct napi_gro_cb to point to the
last fragment.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 2 ++
 net/core/skbuff.c         | 6 +++---
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8eda0276f03..a848ffc327f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1488,6 +1488,9 @@ struct napi_gro_cb {
 
 	/* Used in ipv6_gro_receive() */
 	int	proto;
+
+	/* used in skb_gro_receive() slow path */
+	struct sk_buff *last;
 };
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
diff --git a/net/core/dev.c b/net/core/dev.c
index c0946cb2b354..e5942bf45a6d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3451,6 +3451,8 @@ static int napi_gro_complete(struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int err = -ENOENT;
 
+	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
+
 	if (NAPI_GRO_CB(skb)->count == 1) {
 		skb_shinfo(skb)->gso_size = 0;
 		goto out;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4007c1437fda..3f0636cd76cd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3004,7 +3004,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
 	pinfo->gso_size = 0;
 	skb_header_release(p);
-	nskb->prev = p;
+	NAPI_GRO_CB(nskb)->last = p;
 
 	nskb->data_len += p->len;
 	nskb->truesize += p->truesize;
@@ -3030,8 +3030,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	__skb_pull(skb, offset);
 
-	p->prev->next = skb;
-	p->prev = skb;
+	NAPI_GRO_CB(p)->last->next = skb;
+	NAPI_GRO_CB(p)->last = skb;
 	skb_header_release(skb);
 
 done: