From f557147c4615f5e03abb673bd566901783565666 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Sat, 18 Aug 2012 04:06:11 +0000 Subject: [PATCH 1/7] igb: Change how we check for pre-existing and assigned VFs Adapt the pre-existing and assigned VFs code to the ixgbe way introduced in commit 9297127b9cdd8d30c829ef5fd28b7cc0323a7bcd. Instead of searching the enabled VFs we use pci_num_vf to determine enabled VFs. By comparing to which PF an assigned VF is owned it's possible to decide whether to leave it enabled or not. Signed-off-by: Stefan Assmann Acked-by: Greg Rose Tested-by: Robert Garrett Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 1 - drivers/net/ethernet/intel/igb/igb_main.c | 104 +++++----------------- 2 files changed, 22 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 43c8e2914263..6f17f698fafe 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -101,7 +101,6 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; - struct pci_dev *vfdev; }; #define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 246646b61a1a..073009671e7a 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -172,8 +172,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_find_enabled_vfs(struct igb_adapter *adapter); -static int igb_check_vf_assignment(struct igb_adapter *adapter); +static bool igb_vfs_are_assigned(struct igb_adapter *adapter); #endif #ifdef CONFIG_PM @@ -2300,11 +2299,11 @@ static void __devexit igb_remove(struct pci_dev *pdev) /* reclaim resources allocated to VFs */ if (adapter->vf_data) { /* disable iov and allow time for transactions to clear */ - if (!igb_check_vf_assignment(adapter)) { + if (igb_vfs_are_assigned(adapter)) { + dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n"); + } else { pci_disable_sriov(pdev); msleep(500); - } else { - dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); } kfree(adapter->vf_data); @@ -2344,7 +2343,7 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter) #ifdef CONFIG_PCI_IOV struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - int old_vfs = igb_find_enabled_vfs(adapter); + int old_vfs = pci_num_vf(adapter->pdev); int i; /* Virtualization features not supported on i210 family. */ @@ -5037,102 +5036,43 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, static int igb_vf_configure(struct igb_adapter *adapter, int vf) { unsigned char mac_addr[ETH_ALEN]; - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pvfdev; - unsigned int device_id; - u16 thisvf_devfn; eth_random_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | - (pdev->devfn & 1); - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | - (pdev->devfn & 3); - break; - default: - device_id = 0; - thisvf_devfn = 0; - break; - } - - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == thisvf_devfn) - break; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - if (pvfdev) - adapter->vf_data[vf].vfdev = pvfdev; - else - dev_err(&pdev->dev, - "Couldn't find pci dev ptr for VF %4.4x\n", - thisvf_devfn); - return pvfdev != NULL; + return 0; } -static int igb_find_enabled_vfs(struct igb_adapter *adapter) +static bool igb_vfs_are_assigned(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *pvfdev; - u16 vf_devfn = 0; - u16 vf_stride; - unsigned int device_id; - int vfs_found = 0; + struct pci_dev *vfdev; + int dev_id; switch (adapter->hw.mac.type) { case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - vf_stride = 2; + dev_id = IGB_82576_VF_DEV_ID; break; case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - vf_stride = 4; + dev_id = IGB_I350_VF_DEV_ID; break; default: - device_id = 0; - vf_stride = 0; - break; + return false; } - vf_devfn = pdev->devfn + 0x80; - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == vf_devfn && - (pvfdev->bus->number >= pdev->bus->number)) - vfs_found++; - vf_devfn += vf_stride; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - return vfs_found; -} - -static int igb_check_vf_assignment(struct igb_adapter *adapter) -{ - int i; - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (adapter->vf_data[i].vfdev) { - if (adapter->vf_data[i].vfdev->dev_flags & - PCI_DEV_FLAGS_ASSIGNED) + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL); + while (vfdev) { + /* if we don't own it we don't care */ + if (vfdev->is_virtfn && vfdev->physfn == pdev) { + /* if it is assigned we cannot release it */ + if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) return true; } + + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev); } + return false; } From 3dbdf96928dcdece134113c8ffa137f1a3b5dd88 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 12 Sep 2012 04:36:24 +0000 Subject: [PATCH 2/7] igb: Fix stats output on i210/i211 parts. Due to a hardware issue, on i210 and i211 parts, the TNCRS statistic provides an invalid value. This patch changes the update stats function to increment the stat only for non-i210/i211 parts. Signed-off-by: Carolyn Wyborny Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 073009671e7a..60cf3ebf4bb7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4776,7 +4776,11 @@ void igb_update_stats(struct igb_adapter *adapter, reg = rd32(E1000_CTRL_EXT); if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { adapter->stats.rxerrc += rd32(E1000_RXERRC); - adapter->stats.tncrs += rd32(E1000_TNCRS); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += rd32(E1000_TNCRS); } adapter->stats.tsctc += rd32(E1000_TSCTC); From f33005a637bffca72fe373d1e91f2dfbc8fae157 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 13 Sep 2012 06:27:55 +0000 Subject: [PATCH 3/7] igb: Remove logic that was doing NUMA pseudo-aware allocations This change removes the code that was doing the NUMA allocations for the q_vectors, rings, and ring resources. The problem is the logic used assumed that the NUMA nodes were always interleved and that is not always the case. At some point I hope to add this functionality back in a more controlled manner in the future. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 3 - drivers/net/ethernet/intel/igb/igb_main.c | 95 ++++------------------- 2 files changed, 13 insertions(+), 85 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 6f17f698fafe..9cad05894193 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -213,7 +213,6 @@ struct igb_q_vector { struct igb_ring_container rx, tx; struct napi_struct napi; - int numa_node; u16 itr_val; u8 set_itr; @@ -258,7 +257,6 @@ struct igb_ring { }; /* Items past this point are only used during ring alloc / free */ dma_addr_t dma; /* phys address of the ring */ - int numa_node; /* node to alloc ring memory on */ }; enum e1000_ring_flags_t { @@ -373,7 +371,6 @@ struct igb_adapter { int vf_rate_link_speed; u32 rss_queues; u32 wvbr; - int node; u32 *shadow_vfta; #ifdef CONFIG_IGB_PTP diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 60cf3ebf4bb7..c9997d83de07 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -682,52 +682,29 @@ static int igb_alloc_queues(struct igb_adapter *adapter) { struct igb_ring *ring; int i; - int orig_node = adapter->node; for (i = 0; i < adapter->num_tx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->tx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* For 82575, context index must be unique per ring. */ if (adapter->hw.mac.type == e1000_82575) set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); adapter->tx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; for (i = 0; i < adapter->num_rx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->rx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* set flag indicating ring supports SCTP checksum offload */ if (adapter->hw.mac.type >= e1000_82576) set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); @@ -741,16 +718,12 @@ static int igb_alloc_queues(struct igb_adapter *adapter) adapter->rx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_cache_ring_register(adapter); return 0; err: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_queues(adapter); return -ENOMEM; @@ -1116,24 +1089,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) struct igb_q_vector *q_vector; struct e1000_hw *hw = &adapter->hw; int v_idx; - int orig_node = adapter->node; for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - if ((adapter->num_q_vectors == (adapter->num_rx_queues + - adapter->num_tx_queues)) && - (adapter->num_rx_queues == v_idx)) - adapter->node = orig_node; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, - adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(struct igb_q_vector), + GFP_KERNEL); if (!q_vector) goto err_out; q_vector->adapter = adapter; @@ -1142,14 +1101,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); adapter->q_vector[v_idx] = q_vector; } - /* Restore the adapter's original node */ - adapter->node = orig_node; return 0; err_out: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_q_vectors(adapter); return -ENOMEM; } @@ -2423,8 +2378,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - adapter->node = -1; - spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -2671,13 +2624,11 @@ static int igb_close(struct net_device *netdev) int igb_setup_tx_resources(struct igb_ring *tx_ring) { struct device *dev = tx_ring->dev; - int orig_node = dev_to_node(dev); int size; size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + + tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2685,18 +2636,10 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - set_dev_node(dev, tx_ring->numa_node); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!tx_ring->desc) - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - if (!tx_ring->desc) goto err; @@ -2707,8 +2650,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM; } @@ -2825,34 +2768,23 @@ static void igb_configure_tx(struct igb_adapter *adapter) int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct device *dev = rx_ring->dev; - int orig_node = dev_to_node(dev); - int size, desc_len; + int size; size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + + rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; - desc_len = sizeof(union e1000_adv_rx_desc); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - set_dev_node(dev, rx_ring->numa_node); rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - if (!rx_ring->desc) goto err; @@ -2864,8 +2796,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM; } From f4128785b8f3f1fd7dc81b972661003d639a4676 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 13 Sep 2012 06:28:01 +0000 Subject: [PATCH 4/7] igb: Change Tx cleanup loop to do/while instead of for This change makes it so that Tx cleanup is done in a do/while loop instead of a for loop. The main motivation behind this is the fact that we should never be invoked with a budget less than 1 so we can skip checking the budget before processing the first descriptor. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 28 +++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c9997d83de07..91f542c50f61 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5690,7 +5690,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx.ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -5702,16 +5702,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring->count; - for (; budget; budget--) { - eop_desc = tx_buffer->next_to_watch; - - /* prevent any other reads prior to eop_desc */ - rmb(); + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + rmb(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; @@ -5767,7 +5767,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5783,12 +5789,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; - eop_desc = tx_buffer->next_to_watch; - /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (eop_desc && + if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5812,9 +5816,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_ring->next_to_use, tx_ring->next_to_clean, tx_buffer->time_stamp, - eop_desc, + tx_buffer->next_to_watch, jiffies, - eop_desc->wb.status); + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); From 797fd4be7b6968ea752ae47367ae95454124a698 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 13 Sep 2012 06:28:11 +0000 Subject: [PATCH 5/7] igb: Change how we populate the RSS indirection table This patch cleans up our RSS indirection table configuration so that we generate the same table regardless of CPU endianness. In addition it changes the table setup so that instead of doing a modulo based setup it is instead a divisor based setup. The advantage to this is that we should be able to take the Rx hash and compute the Rx queue with very little CPU overhead if needed. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 55 +++++++++++------------ 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 91f542c50f61..27688d9d2b64 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2834,11 +2834,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - union e1000_reta { - u32 dword; - u8 bytes[4]; - } reta; + u32 j, num_rx_queues, shift = 0; static const u8 rsshash[40] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, @@ -2856,35 +2852,36 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) num_rx_queues = adapter->rss_queues; - if (adapter->vfs_allocated_count) { - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_i350: - case e1000_82580: - num_rx_queues = 1; - shift = 0; - break; - case e1000_82576: + switch (hw->mac.type) { + case e1000_82575: + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count) { shift = 3; num_rx_queues = 2; - break; - case e1000_82575: - shift = 2; - shift2 = 6; - default: - break; } - } else { - if (hw->mac.type == e1000_82575) - shift = 6; + break; + default: + break; } - for (j = 0; j < (32 * 4); j++) { - reta.bytes[j & 3] = (j % num_rx_queues) << shift; - if (shift2) - reta.bytes[j & 3] |= num_rx_queues << shift2; - if ((j & 3) == 3) - wr32(E1000_RETA(j >> 2), reta.dword); + /* + * Populate the indirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + wr32(E1000_RETA(j), reta); } /* From a57fe23e240b95282e60d643cd8ada3d2a66d8c6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 13 Sep 2012 06:28:16 +0000 Subject: [PATCH 6/7] igb: Simplify how we populate the RSS key Instead of storing the RSS key as a character array we can simplify the configuration by making it a u32 array. This allows us to just write one value per register without any unnecessary operations to construct the value. This change will produce the same exact key, the only difference is that I translated the u8 array to a u32 array which will be correctly ordered on writes to hardware by the cpu_to_le32 operations that are built into the writel calls. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 27688d9d2b64..db6e456688a1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2835,20 +2835,14 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; u32 j, num_rx_queues, shift = 0; - static const u8 rsshash[40] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, - 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, - 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, - 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) { - u32 rsskey = rsshash[(j * 4)]; - rsskey |= rsshash[(j * 4) + 1] << 8; - rsskey |= rsshash[(j * 4) + 2] << 16; - rsskey |= rsshash[(j * 4) + 3] << 24; - array_wr32(E1000_RSSRK(0), j, rsskey); - } + for (j = 0; j < 10; j++) + wr32(E1000_RSSRK(j), rsskey[j]); num_rx_queues = adapter->rss_queues; From c9f14bf3a49f86e6402a6e3476a180f2bdc8a71b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 18 Sep 2012 01:56:27 +0000 Subject: [PATCH 7/7] igb: Use dma_unmap_addr and dma_unmap_len defines This change is meant to improve performance on systems that do not require the DMA unmap calls. On those systems we do not need to make use of the unmap address for Tx or the unmap length so we can drop both thereby reducing the size of the Tx buffer info structure. In addition I have changed the logic to check for unmap length instead of unmap address when checking to see if a buffer needs to be unmapped from DMA use. The reasons for this change is that on some platforms it is possible to receive a valid DMA address of 0 from an IOMMU. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 4 +- drivers/net/ethernet/intel/igb/igb_main.c | 64 +++++++++++------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 9cad05894193..8aad230c0592 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -168,8 +168,8 @@ struct igb_tx_buffer { unsigned int bytecount; u16 gso_segs; __be16 protocol; - dma_addr_t dma; - u32 length; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index db6e456688a1..60bf46534835 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -403,8 +403,8 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp); } @@ -455,8 +455,8 @@ static void igb_dump(struct igb_adapter *adapter) " %04X %p %016llX %p%s\n", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); @@ -465,7 +465,8 @@ static void igb_dump(struct igb_adapter *adapter) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, buffer_info->skb->data, - buffer_info->length, true); + dma_unmap_len(buffer_info, len), + true); } } @@ -3198,20 +3199,20 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, { if (tx_buffer->skb) { dev_kfree_skb_any(tx_buffer->skb); - if (tx_buffer->dma) + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } else if (tx_buffer->dma) { + } else if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; - tx_buffer->dma = 0; + dma_unmap_len_set(tx_buffer, len, 0); /* buffer_info must be completely set up in the transmit path */ } @@ -4206,7 +4207,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, const u8 hdr_len) { struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer_info; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; dma_addr_t dma; struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; @@ -4227,8 +4228,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, goto dma_error; /* record length, and DMA address */ - first->length = size; - first->dma = dma; + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { @@ -4270,9 +4271,9 @@ static void igb_tx_map(struct igb_ring *tx_ring, if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = size; - tx_buffer_info->dma = dma; + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); tx_desc->read.olinfo_status = 0; tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -4323,9 +4324,9 @@ static void igb_tx_map(struct igb_ring *tx_ring, /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; @@ -5716,18 +5717,19 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; /* unmap skb header data */ dma_unmap_single(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { - tx_buffer->dma = 0; - tx_buffer++; tx_desc++; i++; @@ -5738,17 +5740,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /* unmap any remaining paged data */ - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); } } - /* clear last DMA location */ - tx_buffer->dma = 0; - /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++;