From 4c3a76abd379d9a4668b2d417baa991de9757dc2 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 19:03:26 +0000
Subject: [PATCH 01/18] bridge: netfilter: fix a memory leak

nf_bridge_alloc() always reset the skb->nf_bridge, so we should always
put the old one.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2c911c0759c2..5ed00bd7009f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 		if (tmp) {
 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
 			atomic_set(&tmp->use, 1);
-			nf_bridge_put(nf_bridge);
 		}
+		nf_bridge_put(nf_bridge);
 		nf_bridge = tmp;
 	}
 	return nf_bridge;

From 7b589a35a1063b52f98c8b0f1b8f69afe91c3dba Mon Sep 17 00:00:00 2001
From: Yinglin Luan <synmyth@gmail.com>
Date: Sun, 22 Aug 2010 21:56:19 +0000
Subject: [PATCH 02/18] netxen: fix poll implementation

Function netxen_intr has pointer to nx_host_sds_ring
as second parameter not pointer to netxen_adapter.

Signed-off-by: Yinglin Luan <synmyth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netxen/netxen_nic_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index cb30df106a2c..73d314592230 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -2131,9 +2131,16 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void netxen_nic_poll_controller(struct net_device *netdev)
 {
+	int ring;
+	struct nx_host_sds_ring *sds_ring;
 	struct netxen_adapter *adapter = netdev_priv(netdev);
+	struct netxen_recv_context *recv_ctx = &adapter->recv_ctx;
+
 	disable_irq(adapter->irq);
-	netxen_intr(adapter->irq, adapter);
+	for (ring = 0; ring < adapter->max_sds_rings; ring++) {
+		sds_ring = &recv_ctx->sds_rings[ring];
+		netxen_intr(adapter->irq, sds_ring);
+	}
 	enable_irq(adapter->irq);
 }
 #endif

From bf82791ed667758a0f18a4b76be2d931d2c1b39d Mon Sep 17 00:00:00 2001
From: Yinglin Luan <synmyth@gmail.com>
Date: Sun, 22 Aug 2010 21:57:56 +0000
Subject: [PATCH 03/18] qlcnic: fix poll implementation

Function qlcnic_intr has pointer to qlcnic_host_sds_ring
as second parameter not pointer to qlcnic_adapter.

Signed-off-by: Yinglin Luan <synmyth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/qlcnic/qlcnic_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 213e3656d953..66eea5972020 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2188,9 +2188,16 @@ static int qlcnic_rx_poll(struct napi_struct *napi, int budget)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void qlcnic_poll_controller(struct net_device *netdev)
 {
+	int ring;
+	struct qlcnic_host_sds_ring *sds_ring;
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+	struct qlcnic_recv_context *recv_ctx = &adapter->recv_ctx;
+
 	disable_irq(adapter->irq);
-	qlcnic_intr(adapter->irq, adapter);
+	for (ring = 0; ring < adapter->max_sds_rings; ring++) {
+		sds_ring = &recv_ctx->sds_rings[ring];
+		qlcnic_intr(adapter->irq, sds_ring);
+	}
 	enable_irq(adapter->irq);
 }
 #endif

From aa25ab7d943a5e1e6bcc2a65ff6669144f5b5d60 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 11 Aug 2010 05:12:57 +0000
Subject: [PATCH 04/18] 3c59x: Fix deadlock between boomerang_interrupt and
 boomerang_start_tx

If netconsole is in use, there is a possibility for deadlock in 3c59x between
boomerang_interrupt and boomerang_start_xmit.  Both routines take the vp->lock,
and if netconsole is in use, a pr_* call from the boomerang_interrupt routine
will result in the netconsole code attempting to trnasmit an skb, which can try
to take the same spin lock, resulting in deadlock.

The fix is pretty straightforward.  This patch allocats a bit in the 3c59x
private structure to indicate that its handling an interrupt.  If we get into
the transmit routine and that bit is set, we can be sure that we have recursed
and will deadlock if we continue, so instead we just return NETDEV_TX_BUSY, so
the stack requeues the skb to try again later.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/3c59x.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index c754d88e5ec9..c685a55fc2f4 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -633,7 +633,8 @@ struct vortex_private {
 		open:1,
 		medialock:1,
 		must_free_region:1,				/* Flag: if zero, Cardbus owns the I/O region */
-		large_frames:1;			/* accept large frames */
+		large_frames:1,			/* accept large frames */
+		handling_irq:1;			/* private in_irq indicator */
 	int drv_flags;
 	u16 status_enable;
 	u16 intr_enable;
@@ -2133,6 +2134,15 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			   dev->name, vp->cur_tx);
 	}
 
+	/*
+	 * We can't allow a recursion from our interrupt handler back into the
+	 * tx routine, as they take the same spin lock, and that causes
+	 * deadlock.  Just return NETDEV_TX_BUSY and let the stack try again in
+	 * a bit
+	 */
+	if (vp->handling_irq)
+		return NETDEV_TX_BUSY;
+
 	if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) {
 		if (vortex_debug > 0)
 			pr_warning("%s: BUG! Tx Ring full, refusing to send buffer.\n",
@@ -2335,11 +2345,13 @@ boomerang_interrupt(int irq, void *dev_id)
 
 	ioaddr = vp->ioaddr;
 
+
 	/*
 	 * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
 	 * and boomerang_start_xmit
 	 */
 	spin_lock(&vp->lock);
+	vp->handling_irq = 1;
 
 	status = ioread16(ioaddr + EL3_STATUS);
 
@@ -2447,6 +2459,7 @@ boomerang_interrupt(int irq, void *dev_id)
 		pr_debug("%s: exiting interrupt, status %4.4x.\n",
 			   dev->name, status);
 handler_exit:
+	vp->handling_irq = 0;
 	spin_unlock(&vp->lock);
 	return IRQ_HANDLED;
 }

From 9dc002d8d9c2af837e789b5bb88c61a8b32c1be8 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 24 Aug 2010 12:21:13 -0700
Subject: [PATCH 05/18] caif-driver: add HAS_DMA dependency

Fix this error on an s390 allyesconfig build:

linux-2.6/drivers/net/caif/caif_spi.c:98:
    undefined reference to `dma_free_coherent'

Cc: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 631a6242b011..75bfc3a9d95f 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -15,7 +15,7 @@ config CAIF_TTY
 
 config CAIF_SPI_SLAVE
 	tristate "CAIF SPI transport driver for slave interface"
-	depends on CAIF
+	depends on CAIF && HAS_DMA
 	default n
 	---help---
 	The CAIF Link layer SPI Protocol driver for Slave SPI interface.

From ef24b16b5d67c815874ed2d0e2581db629661ba3 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@mvista.com>
Date: Tue, 24 Aug 2010 14:46:12 -0700
Subject: [PATCH 06/18] phylib: Fix race between returning phydev and calling
 adjust_link

It is possible that phylib will call adjust_link before returning
from {,of_}phy_connect(), which may cause the following [very rare,
though] oops upon reopening the device:

  Unable to handle kernel paging request for data at address 0x0000024c
  Oops: Kernel access of bad area, sig: 11 [#1]
  PREEMPT SMP NR_CPUS=2 LTT NESTING LEVEL : 0
  P1021 RDB
  Modules linked in:
  NIP: c0345dac LR: c0345dac CTR: c0345d84
  TASK = dffab6b0[30] 'events/0' THREAD: c0d24000 CPU: 0
  [...]
  NIP [c0345dac] adjust_link+0x28/0x19c
  LR [c0345dac] adjust_link+0x28/0x19c
  Call Trace:
  [c0d25f00] [000045e1] 0x45e1 (unreliable)
  [c0d25f30] [c036c158] phy_state_machine+0x3ac/0x554
  [...]

Here is why. Drivers store phydev in their private structures, e.g.
gianfar driver:

static int init_phy(struct net_device *dev)
{
	...
	priv->phydev = of_phy_connect(...);
	...
}

So that adjust_link could retrieve it back:

static void adjust_link(struct net_device *dev)
{
	...
	struct phy_device *phydev = priv->phydev;
	...
}

If the device has been opened before, then phydev->state is set to
PHY_HALTED (or undefined if the driver didn't call phy_stop()).

Now, phy_connect starts the PHY state machine before returning phydev to
the driver:

	phy_start_machine(phydev, NULL);

	if (phydev->irq > 0)
		phy_start_interrupts(phydev);

	return phydev;

The time between 'phy_start_machine()' and 'return phydev' is undefined.
The start machine routine delays execution for 1 second, which is enough
for most cases. But under heavy load, or if you're unlucky, it is quite
possible that PHY state machine will execute before phy_connect()
returns, and so adjust_link callback will try to dereference phydev,
which is not yet ready.

To fix the issue, simply initialize the PHY's state to PHY_READY during
phy_attach(). This will ensure that phylib won't call adjust_link before
phy_start().

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c0761197c07e..16ddc77313cb 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -466,6 +466,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 
 	phydev->interface = interface;
 
+	phydev->state = PHY_READY;
+
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */

From 4169591fd7c260b2b0b4e8f4d51f63f5b15ad78a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 24 Aug 2010 06:52:46 +0000
Subject: [PATCH 07/18] pxa168_eth: remove unneeded null check

"pep->pd" isn't checked consistently in this function.  For example it's
dereferenced unconditionally on the next line after the end of the if
condition.  This function is only called from pxa168_eth_probe() and
pep->pd is always non-NULL so I removed the check.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pxa168_eth.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
index ecc64d750cce..857a68115a5c 100644
--- a/drivers/net/pxa168_eth.c
+++ b/drivers/net/pxa168_eth.c
@@ -1414,10 +1414,8 @@ static int ethernet_phy_setup(struct net_device *dev)
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 
-	if (pep->pd != NULL) {
-		if (pep->pd->init)
-			pep->pd->init();
-	}
+	if (pep->pd->init)
+		pep->pd->init();
 	pep->phy = phy_scan(pep, pep->pd->phy_addr & 0x1f);
 	if (pep->phy != NULL)
 		phy_init(pep, pep->pd->speed, pep->pd->duplex);

From 945c7c73e2e81d68e3e2970afd95254e8f153fc9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 24 Aug 2010 06:53:33 +0000
Subject: [PATCH 08/18] pxa168_eth: fix error handling in prope

A couple issues here:
* Some resources weren't released.
* If alloc_etherdev() failed it would have caused a NULL dereference
  because "pep" would be null when we checked "if (pep->clk)".
* Also it's better to propagate the error codes from mdiobus_register()
  instead of just returning -ENOMEM.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pxa168_eth.c | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
index 857a68115a5c..302fb480374b 100644
--- a/drivers/net/pxa168_eth.c
+++ b/drivers/net/pxa168_eth.c
@@ -1497,7 +1497,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	dev = alloc_etherdev(sizeof(struct pxa168_eth_private));
 	if (!dev) {
 		err = -ENOMEM;
-		goto out;
+		goto err_clk;
 	}
 
 	platform_set_drvdata(pdev, dev);
@@ -1507,12 +1507,12 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		err = -ENODEV;
-		goto out;
+		goto err_netdev;
 	}
 	pep->base = ioremap(res->start, res->end - res->start + 1);
 	if (pep->base == NULL) {
 		err = -ENOMEM;
-		goto out;
+		goto err_netdev;
 	}
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	BUG_ON(!res);
@@ -1549,7 +1549,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	pep->smi_bus = mdiobus_alloc();
 	if (pep->smi_bus == NULL) {
 		err = -ENOMEM;
-		goto out;
+		goto err_base;
 	}
 	pep->smi_bus->priv = pep;
 	pep->smi_bus->name = "pxa168_eth smi";
@@ -1558,31 +1558,31 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
 	pep->smi_bus->parent = &pdev->dev;
 	pep->smi_bus->phy_mask = 0xffffffff;
-	if (mdiobus_register(pep->smi_bus) < 0) {
-		err = -ENOMEM;
-		goto out;
-	}
+	err = mdiobus_register(pep->smi_bus);
+	if (err)
+		goto err_free_mdio;
+
 	pxa168_init_hw(pep);
 	err = ethernet_phy_setup(dev);
 	if (err)
-		goto out;
+		goto err_mdiobus;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	err = register_netdev(dev);
 	if (err)
-		goto out;
+		goto err_mdiobus;
 	return 0;
-out:
-	if (pep->clk) {
-		clk_disable(pep->clk);
-		clk_put(pep->clk);
-		pep->clk = NULL;
-	}
-	if (pep->base) {
-		iounmap(pep->base);
-		pep->base = NULL;
-	}
-	if (dev)
-		free_netdev(dev);
+
+err_mdiobus:
+	mdiobus_unregister(pep->smi_bus);
+err_free_mdio:
+	mdiobus_free(pep->smi_bus);
+err_base:
+	iounmap(pep->base);
+err_netdev:
+	free_netdev(dev);
+err_clk:
+	clk_disable(clk);
+	clk_put(clk);
 	return err;
 }
 

From 4f2c85106883bada5167e1d42a0409e063da8895 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 24 Aug 2010 06:54:20 +0000
Subject: [PATCH 09/18] pxa168_eth: update call to phy_mii_ioctl()

The phy_mii_ioctl() function changed recently.  It now takes a struct
ifreq pointer directly.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pxa168_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
index 302fb480374b..f324b767f685 100644
--- a/drivers/net/pxa168_eth.c
+++ b/drivers/net/pxa168_eth.c
@@ -1350,7 +1350,7 @@ static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 	if (pep->phy != NULL)
-		return phy_mii_ioctl(pep->phy, if_mii(ifr), cmd);
+		return phy_mii_ioctl(pep->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }

From b2bc85631e72485b984bcd202a104591874babba Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 24 Aug 2010 06:55:05 +0000
Subject: [PATCH 10/18] pxa168_eth: silence gcc warnings

Casting "pep->tx_desc_dma" to to a struct tx_desc pointer makes gcc
complain:

drivers/net/pxa168_eth.c:657: warning:
	cast to pointer from integer of different size

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pxa168_eth.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
index f324b767f685..410ea0a61371 100644
--- a/drivers/net/pxa168_eth.c
+++ b/drivers/net/pxa168_eth.c
@@ -654,15 +654,15 @@ static void eth_port_start(struct net_device *dev)
 	/* Assignment of Tx CTRP of given queue */
 	tx_curr_desc = pep->tx_curr_desc_q;
 	wrl(pep, ETH_C_TX_DESC_1,
-	    (u32) ((struct tx_desc *)pep->tx_desc_dma + tx_curr_desc));
+	    (u32) (pep->tx_desc_dma + tx_curr_desc * sizeof(struct tx_desc)));
 
 	/* Assignment of Rx CRDP of given queue */
 	rx_curr_desc = pep->rx_curr_desc_q;
 	wrl(pep, ETH_C_RX_DESC_0,
-	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+	    (u32) (pep->rx_desc_dma + rx_curr_desc * sizeof(struct rx_desc)));
 
 	wrl(pep, ETH_F_RX_DESC_0,
-	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+	    (u32) (pep->rx_desc_dma + rx_curr_desc * sizeof(struct rx_desc)));
 
 	/* Clear all interrupts */
 	wrl(pep, INT_CAUSE, 0);

From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 Aug 2010 02:27:49 -0700
Subject: [PATCH 11/18] tcp: Combat per-cpu skew in orphan tests.

As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.

Fix this by doing the full expensive sum check if the optimized check
triggers.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/tcp.h    | 18 ++++++++++++++----
 net/ipv4/tcp.c       |  5 +----
 net/ipv4/tcp_timer.c |  8 ++++----
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index df6a2eb20193..eaa9582779d0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 	return seq3 - seq2 >= seq1 - seq2;
 }
 
-static inline int tcp_too_many_orphans(struct sock *sk, int num)
+static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
 {
-	return (num > sysctl_tcp_max_orphans) ||
-		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
+	struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+	int orphans = percpu_counter_read_positive(ocp);
+
+	if (orphans << shift > sysctl_tcp_max_orphans) {
+		orphans = percpu_counter_sum_positive(ocp);
+		if (orphans << shift > sysctl_tcp_max_orphans)
+			return true;
+	}
+
+	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+	    atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
+		return true;
+	return false;
 }
 
 /* syncookies: remember time of last synqueue overflow */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..197b9b77fa3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2011,11 +2011,8 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		int orphan_count = percpu_counter_read_positive(
-						sk->sk_prot->orphan_count);
-
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, orphan_count)) {
+		if (tcp_too_many_orphans(sk, 0)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..c35b469e851c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
+	int shift = 0;
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
-		orphans <<= 1;
+		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
 	if (sk->sk_err_soft)
-		orphans <<= 1;
+		shift++;
 
-	if (tcp_too_many_orphans(sk, orphans)) {
+	if (tcp_too_many_orphans(sk, shift)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 

From c5ed63d66f24fd4f7089b5a6e087b0ce7202aa8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:02:17 -0700
Subject: [PATCH 12/18] tcp: fix three tcp sysctls tuning

As discovered by Anton Blanchard, current code to autotune
tcp_death_row.sysctl_max_tw_buckets, sysctl_tcp_max_orphans and
sysctl_max_syn_backlog makes little sense.

The bigger a page is, the less tcp_max_orphans is : 4096 on a 512GB
machine in Anton's case.

(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))
is much bigger if spinlock debugging is on. Its wrong to select bigger
limits in this case (where kernel structures are also bigger)

bhash_size max is 65536, and we get this value even for small machines.

A better ground is to use size of ehash table, this also makes code
shorter and more obvious.

Based on a patch from Anton, and another from David.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197b9b77fa3e..e2add5ff9cb1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3209,7 +3209,7 @@ void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
 	unsigned long nr_pages, limit;
-	int order, i, max_share;
+	int i, max_share, cnt;
 	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3258,22 +3258,12 @@ void __init tcp_init(void)
 		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
 	}
 
-	/* Try to be a bit smarter and adjust defaults depending
-	 * on available memory.
-	 */
-	for (order = 0; ((1 << order) << PAGE_SHIFT) <
-			(tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
-			order++)
-		;
-	if (order >= 4) {
-		tcp_death_row.sysctl_max_tw_buckets = 180000;
-		sysctl_tcp_max_orphans = 4096 << (order - 4);
-		sysctl_max_syn_backlog = 1024;
-	} else if (order < 3) {
-		tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
-		sysctl_tcp_max_orphans >>= (3 - order);
-		sysctl_max_syn_backlog = 128;
-	}
+
+	cnt = tcp_hashinfo.ehash_mask + 1;
+
+	tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
+	sysctl_tcp_max_orphans = cnt / 2;
+	sysctl_max_syn_backlog = max(128, cnt / 256);
 
 	/* Set the pressure threshold to be a fraction of global memory that
 	 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of

From d84ba638e4ba3c40023ff997aa5e8d3ed002af36 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 Aug 2010 16:05:48 +0000
Subject: [PATCH 13/18] tcp: select(writefds) don't hang up when a peer close
 connection

This issue come from ruby language community. Below test program
hang up when only run on Linux.

	% uname -mrsv
	Linux 2.6.26-2-486 #1 Sat Dec 26 08:37:39 UTC 2009 i686
	% ruby -rsocket -ve '
	BasicSocket.do_not_reverse_lookup = true
	serv = TCPServer.open("127.0.0.1", 0)
	s1 = TCPSocket.open("127.0.0.1", serv.addr[1])
	s2 = serv.accept
	s2.close
	s1.write("a") rescue p $!
	s1.write("a") rescue p $!
	Thread.new {
	  s1.write("a")
	}.join'
	ruby 1.9.3dev (2010-07-06 trunk 28554) [i686-linux]
	#<Errno::EPIPE: Broken pipe>
	[Hang Here]

FreeBSD, Solaris, Mac doesn't. because Ruby's write() method call
select() internally. and tcp_poll has a bug.

SUS defined 'ready for writing' of select() as following.

|  A descriptor shall be considered ready for writing when a call to an output
|  function with O_NONBLOCK clear would not block, whether or not the function
|  would transfer data successfully.

That said, EPIPE situation is clearly one of 'ready for writing'.

We don't have read-side issue because tcp_poll() already has read side
shutdown care.

|        if (sk->sk_shutdown & RCV_SHUTDOWN)
|                mask |= POLLIN | POLLRDNORM | POLLRDHUP;

So, Let's insert same logic in write side.

- reference url
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31065
  http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/31068

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2add5ff9cb1..3fb1428e526e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,7 +451,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 					mask |= POLLOUT | POLLWRNORM;
 			}
-		}
+		} else
+			mask |= POLLOUT | POLLWRNORM;
 
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;

From bfc960a8eec023a170a80697fe65157cd4f44f81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Aug 2010 23:44:35 +0000
Subject: [PATCH 14/18] l2tp: test for ethernet header in l2tp_eth_dev_recv()

close https://bugzilla.kernel.org/show_bug.cgi?id=16529

Before calling dev_forward_skb(), we should make sure skb head contains
at least an ethernet header, even if length included in upper layer said
so. Use pskb_may_pull() to make sure this ethernet header is present in
skb head.

Reported-by: Thomas Heil <heil@terminal-consulting.de>
Reported-by: Ian Campbell <Ian.Campbell@eu.citrix.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 58c6c4cda73b..1ae697681bc7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
 		printk("\n");
 	}
 
-	if (data_len < ETH_HLEN)
+	if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
 		goto error;
 
 	secpath_reset(skb);

From fe5f098055ed7f701bfb16f8f87378cf67de9a0e Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Thu, 26 Aug 2010 08:27:58 +0000
Subject: [PATCH 15/18] qlge: reset the chip before freeing the buffers

Qlge is freeing the buffers before stopping the card DMA, and
this can cause some severe error, as a EEH event on PPC.

This patch just stop the card and then free the resources.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/qlge/qlge_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 8d63f69b27d9..5f89e83501f4 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3919,12 +3919,12 @@ static int ql_adapter_down(struct ql_adapter *qdev)
 	for (i = 0; i < qdev->rss_ring_count; i++)
 		netif_napi_del(&qdev->rx_ring[i].napi);
 
-	ql_free_rx_buffers(qdev);
-
 	status = ql_adapter_reset(qdev);
 	if (status)
 		netif_err(qdev, ifdown, qdev->ndev, "reset(func #%d) FAILED!\n",
 			  qdev->func);
+	ql_free_rx_buffers(qdev);
+
 	return status;
 }
 

From d71b0e9c0028f3af910226f995e0074873e16979 Mon Sep 17 00:00:00 2001
From: Bernard Pidoux F6BVP <f6bvp@free.fr>
Date: Thu, 26 Aug 2010 11:40:00 +0000
Subject: [PATCH 16/18] ax25: missplaced sock_put(sk)

This patch moves a missplaced sock_put(sk) after
bh_unlock_sock(sk)
like in other parts of AX25 driver.

Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_ds_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 2ce79df00680..c7d81436213d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
-				sock_put(sk);
 				bh_unlock_sock(sk);
+				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
 			return;

From 7e368739e3b3f1d7944794c178a15f05829b56bc Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 26 Aug 2010 16:11:08 -0700
Subject: [PATCH 17/18] net/caif/cfrfml.c: use asm/unaligned.h

caif does not build on ia64 starting with 2.6.32-rc1.  Using
asm/unaligned.h instead of linux/unaligned/le_byteshift.h fixes the issue.

include/linux/unaligned/le_byteshift.h:40:50: error: redefinition of 'get_unaligned_le16'
include/linux/unaligned/le_byteshift.h:45:50: error: redefinition of 'get_unaligned_le32'
include/linux/unaligned/le_byteshift.h:50:50: error: redefinition of 'get_unaligned_le64'
include/linux/unaligned/le_byteshift.h:55:51: error: redefinition of 'put_unaligned_le16'
include/linux/unaligned/le_byteshift.h:60:51: error: redefinition of 'put_unaligned_le32'
include/linux/unaligned/le_byteshift.h:65:51: error: redefinition of 'put_unaligned_le64'
include/linux/unaligned/le_struct.h:31:51: note: previous definition of 'put_unaligned_le64' was here

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index eb1602022ac0..9a699242d104 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,7 +7,7 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-#include <linux/unaligned/le_byteshift.h>
+#include <asm/unaligned.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>

From c34186ed008229e7f7e3f1de8e6acf6374995358 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 27 Aug 2010 19:31:56 -0700
Subject: [PATCH 18/18] net/ipv4: Eliminate kstrdup memory leak

The string clone is only used as a temporary copy of the argument val
within the while loop, and so it should be freed before leaving the
function.  The call to strsep, however, modifies clone, so a pointer to the
front of the string is kept in saved_clone, to make it possible to free it.

The sematic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E;
identifier l;
statement S;
@@

*x= \(kasprintf\|kstrdup\)(...);
...
if (x == NULL) S
... when != kfree(x)
    when != E = x
if (...) {
  <... when != kfree(x)
* goto l;
  ...>
* return ...;
}
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0ec9bd0ae94f..850c737e08e2 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 int tcp_set_allowed_congestion_control(char *val)
 {
 	struct tcp_congestion_ops *ca;
-	char *clone, *name;
+	char *saved_clone, *clone, *name;
 	int ret = 0;
 
-	clone = kstrdup(val, GFP_USER);
+	saved_clone = clone = kstrdup(val, GFP_USER);
 	if (!clone)
 		return -ENOMEM;
 
@@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val)
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
+	kfree(saved_clone);
 
 	return ret;
 }