linux-stable/drivers/net/ethernet/intel/ice/ice_ethtool.c

3846 lines
110 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */
/* ethtool support for ice */
#include "ice.h"
#include "ice_flow.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
struct ice_stats {
char stat_string[ETH_GSTRING_LEN];
int sizeof_stat;
int stat_offset;
};
#define ICE_STAT(_type, _name, _stat) { \
.stat_string = _name, \
.sizeof_stat = sizeof_field(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
#define ICE_VSI_STAT(_name, _stat) \
ICE_STAT(struct ice_vsi, _name, _stat)
#define ICE_PF_STAT(_name, _stat) \
ICE_STAT(struct ice_pf, _name, _stat)
static int ice_q_stats_len(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
(sizeof(struct ice_q_stats) / sizeof(u64)));
}
#define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats)
#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
#define ICE_PFC_STATS_LEN ( \
(sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \
sizeof_field(struct ice_pf, stats.priority_xon_rx) + \
sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \
sizeof_field(struct ice_pf, stats.priority_xon_tx)) \
/ sizeof(u64))
#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
ICE_VSI_STATS_LEN + ice_q_stats_len(n))
static const struct ice_stats ice_gstrings_vsi_stats[] = {
ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
ICE_VSI_STAT("tx_linearize", tx_linearize),
};
enum ice_ethtool_test_id {
ICE_ETH_TEST_REG = 0,
ICE_ETH_TEST_EEPROM,
ICE_ETH_TEST_INTR,
ICE_ETH_TEST_LOOP,
ICE_ETH_TEST_LINK,
};
static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
"Register test (offline)",
"EEPROM test (offline)",
"Interrupt test (offline)",
"Loopback test (offline)",
"Link test (on/offline)",
};
#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
* but they aren't. This device is capable of supporting multiple
* VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
* netdevs whereas the PF_STATs are for the physical function that's
* hosting these netdevs.
*
* The PF_STATs are appended to the netdev stats only when ethtool -S
* is queried on the base PF netdev.
*/
static const struct ice_stats ice_gstrings_pf_stats[] = {
ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
};
static const u32 ice_regs_dump_list[] = {
PFGEN_STATE,
PRTGEN_STATUS,
QRX_CTRL(0),
QINT_TQCTL(0),
QINT_RQCTL(0),
PFINT_OICR_ENA,
QRX_ITR(0),
PF0INT_ITR_0(0),
PF0INT_ITR_1(0),
PF0INT_ITR_2(0),
};
struct ice_priv_flag {
char name[ETH_GSTRING_LEN];
u32 bitno; /* bit position in pf->flags */
};
#define ICE_PRIV_FLAG(_name, _bitno) { \
.name = _name, \
.bitno = _bitno, \
}
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
ice: introduce legacy Rx flag Add an ethtool "legacy-rx" priv flag for toggling the Rx path. This control knob will be mainly used for build_skb usage as well as buffer size/MTU manipulation. In preparation for adding build_skb support in a way that it takes care of how we set the values of max_frame and rx_buf_len fields of struct ice_vsi. Specifically, in this patch mentioned fields are set to values that will allow us to provide headroom and tailroom in-place. This can be mostly broken down onto following: - for legacy-rx "on" ethtool control knob, old behaviour is kept; - for standard 1500 MTU size configure the buffer of size 1536, as network stack is expecting the NET_SKB_PAD to be provided and NET_IP_ALIGN can have a non-zero value (these can be typically equal to 32 and 2, respectively); - for larger MTUs go with max_frame set to 9k and configure the 3k buffer in case when PAGE_SIZE of underlying arch is less than 8k; 3k buffer is implying the need for order 1 page, so that our page recycling scheme can still be applied; With that said, substitute the hardcoded ICE_RXBUF_2048 and PAGE_SIZE values in DMA API that we're making use of with rx_ring->rx_buf_len and ice_rx_pg_size(rx_ring). The latter is an introduced helper for determining the page size based on its order (which was figured out via ice_rx_pg_order). Last but not least, take care of truesize calculation. In the followup patch the headroom/tailroom computation logic will be introduced. This change aligns the buffer and frame configuration with other Intel drivers, most importantly with iavf. Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-10-24 08:11:22 +00:00
ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
static void
ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
u16 oem_build;
strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version));
/* Display NVM version (from which the firmware version can be
* determined) which contains more pertinent information.
*/
ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch,
&nvm_ver_hi, &nvm_ver_lo);
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo,
hw->nvm.eetrack, oem_ver, oem_build, oem_patch);
strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
sizeof(drvinfo->bus_info));
drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
}
static int ice_get_regs_len(struct net_device __always_unused *netdev)
{
return sizeof(ice_regs_dump_list);
}
static void
ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
struct ice_hw *hw = &pf->hw;
u32 *regs_buf = (u32 *)p;
int i;
regs->version = 1;
for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
}
static u32 ice_get_msglevel(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
#ifndef CONFIG_DYNAMIC_DEBUG
if (pf->hw.debug_mask)
netdev_info(netdev, "hw debug_mask: 0x%llX\n",
pf->hw.debug_mask);
#endif /* !CONFIG_DYNAMIC_DEBUG */
return pf->msg_enable;
}
static void ice_set_msglevel(struct net_device *netdev, u32 data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
#ifndef CONFIG_DYNAMIC_DEBUG
if (ICE_DBG_USER & data)
pf->hw.debug_mask = data;
else
pf->msg_enable = data;
#else
pf->msg_enable = data;
#endif /* !CONFIG_DYNAMIC_DEBUG */
}
static int ice_get_eeprom_len(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
return (int)(pf->hw.nvm.sr_words * sizeof(u16));
}
static int
ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
u8 *bytes)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
u16 first_word, last_word, nwords;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
struct device *dev;
int ret = 0;
u16 *buf;
dev = ice_pf_to_dev(pf);
eeprom->magic = hw->vendor_id | (hw->device_id << 16);
first_word = eeprom->offset >> 1;
last_word = (eeprom->offset + eeprom->len - 1) >> 1;
nwords = last_word - first_word + 1;
buf = devm_kcalloc(dev, nwords, sizeof(u16), GFP_KERNEL);
if (!buf)
return -ENOMEM;
status = ice_read_sr_buf(hw, first_word, &nwords, buf);
if (status) {
dev_err(dev, "ice_read_sr_buf failed, err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
eeprom->len = sizeof(u16) * nwords;
ret = -EIO;
goto out;
}
memcpy(bytes, (u8 *)buf + (eeprom->offset & 1), eeprom->len);
out:
devm_kfree(dev, buf);
return ret;
}
/**
* ice_active_vfs - check if there are any active VFs
* @pf: board private structure
*
* Returns true if an active VF is found, otherwise returns false
*/
static bool ice_active_vfs(struct ice_pf *pf)
{
int i;
ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
return true;
}
return false;
}
/**
* ice_link_test - perform a link test on a given net_device
* @netdev: network interface device structure
*
* This function performs one of the self-tests required by ethtool.
* Returns 0 on success, non-zero on failure.
*/
static u64 ice_link_test(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
enum ice_status status;
bool link_up = false;
netdev_info(netdev, "link test\n");
status = ice_get_link_status(np->vsi->port_info, &link_up);
if (status) {
netdev_err(netdev, "link query error, status = %d\n", status);
return 1;
}
if (!link_up)
return 2;
return 0;
}
/**
* ice_eeprom_test - perform an EEPROM test on a given net_device
* @netdev: network interface device structure
*
* This function performs one of the self-tests required by ethtool.
* Returns 0 on success, non-zero on failure.
*/
static u64 ice_eeprom_test(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
netdev_info(netdev, "EEPROM test\n");
return !!(ice_nvm_validate_checksum(&pf->hw));
}
/**
* ice_reg_pattern_test
* @hw: pointer to the HW struct
* @reg: reg to be tested
* @mask: bits to be touched
*/
static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
{
struct ice_pf *pf = (struct ice_pf *)hw->back;
struct device *dev = ice_pf_to_dev(pf);
static const u32 patterns[] = {
0x5A5A5A5A, 0xA5A5A5A5,
0x00000000, 0xFFFFFFFF
};
u32 val, orig_val;
int i;
orig_val = rd32(hw, reg);
for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
u32 pattern = patterns[i] & mask;
wr32(hw, reg, pattern);
val = rd32(hw, reg);
if (val == pattern)
continue;
dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
, __func__, reg, pattern, val);
return 1;
}
wr32(hw, reg, orig_val);
val = rd32(hw, reg);
if (val != orig_val) {
dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
, __func__, reg, orig_val, val);
return 1;
}
return 0;
}
/**
* ice_reg_test - perform a register test on a given net_device
* @netdev: network interface device structure
*
* This function performs one of the self-tests required by ethtool.
* Returns 0 on success, non-zero on failure.
*/
static u64 ice_reg_test(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_hw *hw = np->vsi->port_info->hw;
u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
struct ice_diag_reg_test_info {
u32 address;
u32 mask;
u32 elem_num;
u32 elem_size;
} ice_reg_list[] = {
{GLINT_ITR(0, 0), 0x00000fff, int_elements,
GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
{GLINT_ITR(1, 0), 0x00000fff, int_elements,
GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
{GLINT_ITR(0, 0), 0x00000fff, int_elements,
GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
{GLINT_CTL, 0xffff0001, 1, 0}
};
int i;
netdev_dbg(netdev, "Register test\n");
for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
u32 j;
for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
u32 mask = ice_reg_list[i].mask;
u32 reg = ice_reg_list[i].address +
(j * ice_reg_list[i].elem_size);
/* bail on failure (non-zero return) */
if (ice_reg_pattern_test(hw, reg, mask))
return 1;
}
}
return 0;
}
/**
* ice_lbtest_prepare_rings - configure Tx/Rx test rings
* @vsi: pointer to the VSI structure
*
* Function configures rings of a VSI for loopback test without
* enabling interrupts or informing the kernel about new queues.
*
* Returns 0 on success, negative on failure.
*/
static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
{
int status;
status = ice_vsi_setup_tx_rings(vsi);
if (status)
goto err_setup_tx_ring;
status = ice_vsi_setup_rx_rings(vsi);
if (status)
goto err_setup_rx_ring;
status = ice_vsi_cfg(vsi);
if (status)
goto err_setup_rx_ring;
status = ice_vsi_start_all_rx_rings(vsi);
if (status)
goto err_start_rx_ring;
return status;
err_start_rx_ring:
ice_vsi_free_rx_rings(vsi);
err_setup_rx_ring:
ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
err_setup_tx_ring:
ice_vsi_free_tx_rings(vsi);
return status;
}
/**
* ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
* @vsi: pointer to the VSI structure
*
* Function stops and frees VSI rings after a loopback test.
* Returns 0 on success, negative on failure.
*/
static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
{
int status;
status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
if (status)
netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, status);
status = ice_vsi_stop_all_rx_rings(vsi);
if (status)
netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
vsi->vsi_num, status);
ice_vsi_free_tx_rings(vsi);
ice_vsi_free_rx_rings(vsi);
return status;
}
/**
* ice_lbtest_create_frame - create test packet
* @pf: pointer to the PF structure
* @ret_data: allocated frame buffer
* @size: size of the packet data
*
* Function allocates a frame with a test pattern on specific offsets.
* Returns 0 on success, non-zero on failure.
*/
static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
{
u8 *data;
if (!pf)
return -EINVAL;
data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
if (!data)
return -ENOMEM;
/* Since the ethernet test frame should always be at least
* 64 bytes long, fill some octets in the payload with test data.
*/
memset(data, 0xFF, size);
data[32] = 0xDE;
data[42] = 0xAD;
data[44] = 0xBE;
data[46] = 0xEF;
*ret_data = data;
return 0;
}
/**
* ice_lbtest_check_frame - verify received loopback frame
* @frame: pointer to the raw packet data
*
* Function verifies received test frame with a pattern.
* Returns true if frame matches the pattern, false otherwise.
*/
static bool ice_lbtest_check_frame(u8 *frame)
{
/* Validate bytes of a frame under offsets chosen earlier */
if (frame[32] == 0xDE &&
frame[42] == 0xAD &&
frame[44] == 0xBE &&
frame[46] == 0xEF &&
frame[48] == 0xFF)
return true;
return false;
}
/**
* ice_diag_send - send test frames to the test ring
* @tx_ring: pointer to the transmit ring
* @data: pointer to the raw packet data
* @size: size of the packet to send
*
* Function sends loopback packets on a test Tx ring.
*/
static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
{
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
u64 td_cmd;
tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(tx_ring->dev, dma))
return -EINVAL;
tx_desc->buf_addr = cpu_to_le64(dma);
/* These flags are required for a descriptor to be pushed out */
td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
tx_desc->cmd_type_offset_bsz =
cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
(td_cmd << ICE_TXD_QW1_CMD_S) |
((u64)0 << ICE_TXD_QW1_OFFSET_S) |
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
((u64)0 << ICE_TXD_QW1_L2TAG1_S));
tx_buf->next_to_watch = tx_desc;
/* Force memory write to complete before letting h/w know
* there are new descriptors to fetch.
*/
wmb();
tx_ring->next_to_use++;
if (tx_ring->next_to_use >= tx_ring->count)
tx_ring->next_to_use = 0;
writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
/* Wait until the packets get transmitted to the receive queue. */
usleep_range(1000, 2000);
dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
return 0;
}
#define ICE_LB_FRAME_SIZE 64
/**
* ice_lbtest_receive_frames - receive and verify test frames
* @rx_ring: pointer to the receive ring
*
* Function receives loopback packets and verify their correctness.
* Returns number of received valid frames.
*/
static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
{
struct ice_rx_buf *rx_buf;
int valid_frames, i;
u8 *received_buf;
valid_frames = 0;
for (i = 0; i < rx_ring->count; i++) {
union ice_32b_rx_flex_desc *rx_desc;
rx_desc = ICE_RX_DESC(rx_ring, i);
if (!(rx_desc->wb.status_error0 &
cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
continue;
rx_buf = &rx_ring->rx_buf[i];
received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
}
return valid_frames;
}
/**
* ice_loopback_test - perform a loopback test on a given net_device
* @netdev: network interface device structure
*
* This function performs one of the self-tests required by ethtool.
* Returns 0 on success, non-zero on failure.
*/
static u64 ice_loopback_test(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
struct ice_pf *pf = orig_vsi->back;
struct ice_ring *tx_ring, *rx_ring;
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
LIST_HEAD(tmp_list);
struct device *dev;
u8 *tx_frame;
int i;
dev = ice_pf_to_dev(pf);
netdev_info(netdev, "loopback test\n");
test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
if (!test_vsi) {
netdev_err(netdev, "Failed to create a VSI for the loopback test\n");
return 1;
}
test_vsi->netdev = netdev;
tx_ring = test_vsi->tx_rings[0];
rx_ring = test_vsi->rx_rings[0];
if (ice_lbtest_prepare_rings(test_vsi)) {
ret = 2;
goto lbtest_vsi_close;
}
if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
ret = 3;
goto lbtest_rings_dis;
}
/* Enable MAC loopback in firmware */
if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
ret = 4;
goto lbtest_mac_dis;
}
/* Test VSI needs to receive broadcast packets */
eth_broadcast_addr(broadcast);
if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
ret = 5;
goto lbtest_mac_dis;
}
if (ice_add_mac(&pf->hw, &tmp_list)) {
ret = 6;
goto free_mac_list;
}
if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
ret = 7;
goto remove_mac_filters;
}
num_frames = min_t(int, tx_ring->count, 32);
for (i = 0; i < num_frames; i++) {
if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
ret = 8;
goto lbtest_free_frame;
}
}
valid_frames = ice_lbtest_receive_frames(rx_ring);
if (!valid_frames)
ret = 9;
else if (valid_frames != num_frames)
ret = 10;
lbtest_free_frame:
devm_kfree(dev, tx_frame);
remove_mac_filters:
if (ice_remove_mac(&pf->hw, &tmp_list))
netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
free_mac_list:
ice_free_fltr_list(dev, &tmp_list);
lbtest_mac_dis:
/* Disable MAC loopback after the test is completed. */
if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
netdev_err(netdev, "Could not disable MAC loopback\n");
lbtest_rings_dis:
if (ice_lbtest_disable_rings(test_vsi))
netdev_err(netdev, "Could not disable test rings\n");
lbtest_vsi_close:
test_vsi->netdev = NULL;
if (ice_vsi_release(test_vsi))
netdev_err(netdev, "Failed to remove the test VSI\n");
return ret;
}
/**
* ice_intr_test - perform an interrupt test on a given net_device
* @netdev: network interface device structure
*
* This function performs one of the self-tests required by ethtool.
* Returns 0 on success, non-zero on failure.
*/
static u64 ice_intr_test(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
u16 swic_old = pf->sw_int_count;
netdev_info(netdev, "interrupt test\n");
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
GLINT_DYN_CTL_SW_ITR_INDX_M |
GLINT_DYN_CTL_INTENA_MSK_M |
GLINT_DYN_CTL_SWINT_TRIG_M);
usleep_range(1000, 2000);
return (swic_old == pf->sw_int_count);
}
/**
* ice_self_test - handler function for performing a self-test by ethtool
* @netdev: network interface device structure
* @eth_test: ethtool_test structure
* @data: required by ethtool.self_test
*
* This function is called after invoking 'ethtool -t devname' command where
* devname is the name of the network device on which ethtool should operate.
* It performs a set of self-tests to check if a device works properly.
*/
static void
ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
u64 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
bool if_running = netif_running(netdev);
struct ice_pf *pf = np->vsi->back;
struct device *dev;
dev = ice_pf_to_dev(pf);
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
netdev_info(netdev, "offline testing starting\n");
set_bit(__ICE_TESTING, pf->state);
if (ice_active_vfs(pf)) {
dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
data[ICE_ETH_TEST_REG] = 1;
data[ICE_ETH_TEST_EEPROM] = 1;
data[ICE_ETH_TEST_INTR] = 1;
data[ICE_ETH_TEST_LOOP] = 1;
data[ICE_ETH_TEST_LINK] = 1;
eth_test->flags |= ETH_TEST_FL_FAILED;
clear_bit(__ICE_TESTING, pf->state);
goto skip_ol_tests;
}
/* If the device is online then take it offline */
if (if_running)
/* indicate we're in test mode */
ice_stop(netdev);
data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
if (data[ICE_ETH_TEST_LINK] ||
data[ICE_ETH_TEST_EEPROM] ||
data[ICE_ETH_TEST_LOOP] ||
data[ICE_ETH_TEST_INTR] ||
data[ICE_ETH_TEST_REG])
eth_test->flags |= ETH_TEST_FL_FAILED;
clear_bit(__ICE_TESTING, pf->state);
if (if_running) {
int status = ice_open(netdev);
if (status) {
dev_err(dev, "Could not open device %s, err %d\n",
pf->int_name, status);
}
}
} else {
/* Online tests */
netdev_info(netdev, "online testing starting\n");
data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
if (data[ICE_ETH_TEST_LINK])
eth_test->flags |= ETH_TEST_FL_FAILED;
/* Offline only tests, not run in online; pass by default */
data[ICE_ETH_TEST_REG] = 0;
data[ICE_ETH_TEST_EEPROM] = 0;
data[ICE_ETH_TEST_INTR] = 0;
data[ICE_ETH_TEST_LOOP] = 0;
}
skip_ol_tests:
netdev_info(netdev, "testing finished\n");
}
static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
char *p = (char *)data;
unsigned int i;
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
snprintf(p, ETH_GSTRING_LEN, "%s",
ice_gstrings_vsi_stats[i].stat_string);
p += ETH_GSTRING_LEN;
}
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
ice_for_each_alloc_txq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
"tx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
ice_for_each_alloc_rxq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
"rx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
if (vsi->type != ICE_VSI_PF)
return;
for (i = 0; i < ICE_PF_STATS_LEN; i++) {
snprintf(p, ETH_GSTRING_LEN, "%s",
ice_gstrings_pf_stats[i].stat_string);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
snprintf(p, ETH_GSTRING_LEN,
"tx_priority_%u_xon.nic", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN,
"tx_priority_%u_xoff.nic", i);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
snprintf(p, ETH_GSTRING_LEN,
"rx_priority_%u_xon.nic", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN,
"rx_priority_%u_xoff.nic", i);
p += ETH_GSTRING_LEN;
}
break;
case ETH_SS_TEST:
memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
break;
case ETH_SS_PRIV_FLAGS:
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
snprintf(p, ETH_GSTRING_LEN, "%s",
ice_gstrings_priv_flags[i].name);
p += ETH_GSTRING_LEN;
}
break;
default:
break;
}
}
static int
ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
bool led_active;
switch (state) {
case ETHTOOL_ID_ACTIVE:
led_active = true;
break;
case ETHTOOL_ID_INACTIVE:
led_active = false;
break;
default:
return -EINVAL;
}
if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
return -EIO;
return 0;
}
/**
* ice_set_fec_cfg - Set link FEC options
* @netdev: network interface device structure
* @req_fec: FEC mode to configure
*/
static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_aqc_set_phy_cfg_data config = { 0 };
struct ice_aqc_get_phy_caps_data *caps;
struct ice_vsi *vsi = np->vsi;
u8 sw_cfg_caps, sw_cfg_fec;
struct ice_port_info *pi;
enum ice_status status;
int err = 0;
pi = vsi->port_info;
if (!pi)
return -EOPNOTSUPP;
/* Changing the FEC parameters is not supported if not the PF VSI */
if (vsi->type != ICE_VSI_PF) {
netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
return -EOPNOTSUPP;
}
/* Get last SW configuration */
caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
caps, NULL);
if (status) {
err = -EAGAIN;
goto done;
}
/* Copy SW configuration returned from PHY caps to PHY config */
ice_copy_phy_caps_to_cfg(caps, &config);
sw_cfg_caps = caps->caps;
sw_cfg_fec = caps->link_fec_options;
/* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
memset(caps, 0, sizeof(*caps));
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
caps, NULL);
if (status) {
err = -EAGAIN;
goto done;
}
config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
config.link_fec_opt = caps->link_fec_options;
ice_cfg_phy_fec(&config, req_fec);
/* If FEC mode has changed, then set PHY configuration and enable AN. */
if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
(sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
config.link_fec_opt != sw_cfg_fec) {
if (caps->caps & ICE_AQC_PHY_AN_MODE)
config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
if (status)
err = -EAGAIN;
}
done:
kfree(caps);
return err;
}
/**
* ice_set_fecparam - Set FEC link options
* @netdev: network interface device structure
* @fecparam: Ethtool structure to retrieve FEC parameters
*/
static int
ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
enum ice_fec_mode fec;
switch (fecparam->fec) {
case ETHTOOL_FEC_AUTO:
fec = ICE_FEC_AUTO;
break;
case ETHTOOL_FEC_RS:
fec = ICE_FEC_RS;
break;
case ETHTOOL_FEC_BASER:
fec = ICE_FEC_BASER;
break;
case ETHTOOL_FEC_OFF:
case ETHTOOL_FEC_NONE:
fec = ICE_FEC_NONE;
break;
default:
dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n",
fecparam->fec);
return -EINVAL;
}
return ice_set_fec_cfg(netdev, fec);
}
/**
* ice_get_fecparam - Get link FEC options
* @netdev: network interface device structure
* @fecparam: Ethtool structure to retrieve FEC parameters
*/
static int
ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_aqc_get_phy_caps_data *caps;
struct ice_link_status *link_info;
struct ice_vsi *vsi = np->vsi;
struct ice_port_info *pi;
enum ice_status status;
int err = 0;
pi = vsi->port_info;
if (!pi)
return -EOPNOTSUPP;
link_info = &pi->phy.link_info;
/* Set FEC mode based on negotiated link info */
switch (link_info->fec_info) {
case ICE_AQ_LINK_25G_KR_FEC_EN:
fecparam->active_fec = ETHTOOL_FEC_BASER;
break;
case ICE_AQ_LINK_25G_RS_528_FEC_EN:
case ICE_AQ_LINK_25G_RS_544_FEC_EN:
fecparam->active_fec = ETHTOOL_FEC_RS;
break;
default:
fecparam->active_fec = ETHTOOL_FEC_OFF;
break;
}
caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
caps, NULL);
if (status) {
err = -EAGAIN;
goto done;
}
/* Set supported/configured FEC modes based on PHY capability */
if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
fecparam->fec |= ETHTOOL_FEC_AUTO;
if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
fecparam->fec |= ETHTOOL_FEC_BASER;
if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
fecparam->fec |= ETHTOOL_FEC_RS;
if (caps->link_fec_options == 0)
fecparam->fec |= ETHTOOL_FEC_OFF;
done:
kfree(caps);
return err;
}
/**
* ice_get_priv_flags - report device private flags
* @netdev: network interface device structure
*
* The get string set count and the string set should be matched for each
* flag returned. Add new strings for each flag to the ice_gstrings_priv_flags
* array.
*
* Returns a u32 bitmap of flags.
*/
static u32 ice_get_priv_flags(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
u32 i, ret_flags = 0;
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
const struct ice_priv_flag *priv_flag;
priv_flag = &ice_gstrings_priv_flags[i];
if (test_bit(priv_flag->bitno, pf->flags))
ret_flags |= BIT(i);
}
return ret_flags;
}
/**
* ice_set_priv_flags - set private flags
* @netdev: network interface device structure
* @flags: bit flags to be set
*/
static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct device *dev;
int ret = 0;
u32 i;
if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
return -EINVAL;
dev = ice_pf_to_dev(pf);
set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
const struct ice_priv_flag *priv_flag;
priv_flag = &ice_gstrings_priv_flags[i];
if (flags & BIT(i))
set_bit(priv_flag->bitno, pf->flags);
else
clear_bit(priv_flag->bitno, pf->flags);
}
bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
enum ice_status status;
/* Disable FW LLDP engine */
status = ice_cfg_lldp_mib_change(&pf->hw, false);
/* If unregistering for LLDP events fails, this is
* not an error state, as there shouldn't be any
* events to respond to.
*/
if (status)
dev_info(dev, "Failed to unreg for LLDP events\n");
/* The AQ call to stop the FW LLDP agent will generate
* an error if the agent is already stopped.
*/
status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
if (status)
dev_warn(dev, "Fail to stop LLDP agent\n");
/* Use case for having the FW LLDP agent stopped
* will likely not need DCB, so failure to init is
* not a concern of ethtool
*/
status = ice_init_pf_dcb(pf, true);
if (status)
dev_warn(dev, "Fail to init DCB\n");
} else {
enum ice_status status;
bool dcbx_agent_status;
/* AQ command to start FW LLDP agent will return an
* error if the agent is already started
*/
status = ice_aq_start_lldp(&pf->hw, true, NULL);
if (status)
dev_warn(dev, "Fail to start LLDP Agent\n");
/* AQ command to start FW DCBX agent will fail if
* the agent is already started
*/
status = ice_aq_start_stop_dcbx(&pf->hw, true,
&dcbx_agent_status,
NULL);
if (status)
dev_dbg(dev, "Failed to start FW DCBX\n");
dev_info(dev, "FW DCBX agent is %s\n",
dcbx_agent_status ? "ACTIVE" : "DISABLED");
/* Failure to configure MIB change or init DCB is not
* relevant to ethtool. Print notification that
* registration/init failed but do not return error
* state to ethtool
*/
status = ice_init_pf_dcb(pf, true);
if (status)
dev_dbg(dev, "Fail to init DCB\n");
/* Remove rule to direct LLDP packets to default VSI.
* The FW LLDP engine will now be consuming them.
*/
ice_cfg_sw_lldp(vsi, false, false);
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
dev_dbg(dev, "Fail to enable MIB change events\n");
}
}
ice: introduce legacy Rx flag Add an ethtool "legacy-rx" priv flag for toggling the Rx path. This control knob will be mainly used for build_skb usage as well as buffer size/MTU manipulation. In preparation for adding build_skb support in a way that it takes care of how we set the values of max_frame and rx_buf_len fields of struct ice_vsi. Specifically, in this patch mentioned fields are set to values that will allow us to provide headroom and tailroom in-place. This can be mostly broken down onto following: - for legacy-rx "on" ethtool control knob, old behaviour is kept; - for standard 1500 MTU size configure the buffer of size 1536, as network stack is expecting the NET_SKB_PAD to be provided and NET_IP_ALIGN can have a non-zero value (these can be typically equal to 32 and 2, respectively); - for larger MTUs go with max_frame set to 9k and configure the 3k buffer in case when PAGE_SIZE of underlying arch is less than 8k; 3k buffer is implying the need for order 1 page, so that our page recycling scheme can still be applied; With that said, substitute the hardcoded ICE_RXBUF_2048 and PAGE_SIZE values in DMA API that we're making use of with rx_ring->rx_buf_len and ice_rx_pg_size(rx_ring). The latter is an introduced helper for determining the page size based on its order (which was figured out via ice_rx_pg_order). Last but not least, take care of truesize calculation. In the followup patch the headroom/tailroom computation logic will be introduced. This change aligns the buffer and frame configuration with other Intel drivers, most importantly with iavf. Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-10-24 08:11:22 +00:00
if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
/* down and up VSI so that changes of Rx cfg are reflected. */
ice_down(vsi);
ice_up(vsi);
}
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
static int ice_get_sset_count(struct net_device *netdev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
/* The number (and order) of strings reported *must* remain
* constant for a given netdevice. This function must not
* report a different number based on run time parameters
* (such as the number of queues in use, or the setting of
* a private ethtool flag). This is due to the nature of the
* ethtool stats API.
*
* Userspace programs such as ethtool must make 3 separate
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
* ioctl requests, one for size, one for the strings, and
* finally one for the stats. Since these cross into
* userspace, changes to the number or size could result in
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
* undefined memory access or incorrect string<->value
* correlations for statistics.
*
* Even if it appears to be safe, changes to the size or
* order of strings will suffer from race conditions and are
* not safe.
*/
return ICE_ALL_STATS_LEN(netdev);
case ETH_SS_TEST:
return ICE_TEST_LEN;
case ETH_SS_PRIV_FLAGS:
return ICE_PRIV_FLAG_ARRAY_SIZE;
default:
return -EOPNOTSUPP;
}
}
static void
ice_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats __always_unused *stats, u64 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct ice_ring *ring;
unsigned int j;
int i = 0;
char *p;
ice_update_pf_stats(pf);
ice_update_vsi_stats(vsi);
for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
/* populate per queue stats */
rcu_read_lock();
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
ice_for_each_alloc_txq(vsi, j) {
ring = READ_ONCE(vsi->tx_rings[j]);
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
if (ring) {
data[i++] = ring->stats.pkts;
data[i++] = ring->stats.bytes;
} else {
data[i++] = 0;
data[i++] = 0;
}
}
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
ice_for_each_alloc_rxq(vsi, j) {
ring = READ_ONCE(vsi->rx_rings[j]);
ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Tony Brelinski <tonyx.brelinski@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 13:28:54 +00:00
if (ring) {
data[i++] = ring->stats.pkts;
data[i++] = ring->stats.bytes;
} else {
data[i++] = 0;
data[i++] = 0;
}
}
rcu_read_unlock();
if (vsi->type != ICE_VSI_PF)
return;
for (j = 0; j < ICE_PF_STATS_LEN; j++) {
p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
}
for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
data[i++] = pf->stats.priority_xon_tx[j];
data[i++] = pf->stats.priority_xoff_tx[j];
}
for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
data[i++] = pf->stats.priority_xon_rx[j];
data[i++] = pf->stats.priority_xoff_rx[j];
}
}
/**
* ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
* @netdev: network interface device structure
* @ks: ethtool link ksettings struct to fill out
*/
static void
ice_phy_type_to_ethtool(struct net_device *netdev,
struct ethtool_link_ksettings *ks)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_link_status *hw_link_info;
bool need_add_adv_mode = false;
struct ice_vsi *vsi = np->vsi;
u64 phy_types_high;
u64 phy_types_low;
hw_link_info = &vsi->port_info->phy.link_info;
phy_types_low = vsi->port_info->phy.phy_type_low;
phy_types_high = vsi->port_info->phy.phy_type_high;
ethtool_link_ksettings_zero_link_mode(ks, supported);
ethtool_link_ksettings_zero_link_mode(ks, advertising);
if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100baseT_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
100baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseT_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseKX_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseKX_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_SX ||
phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseX_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseX_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseT_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
2500baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_X ||
phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseX_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
2500baseX_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
5000baseT_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
5000baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_DA ||
phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseT_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseKR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseKR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseSR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseSR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseLR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseLR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseCR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseCR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_SR ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseSR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseSR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseKR_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseKR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseKR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseKR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseCR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseCR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseSR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseSR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseLR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseLR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseCR2_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseCR2_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseKR2_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseKR2_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_FR ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseSR2_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseSR2_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC ||
phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2 ||
phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC ||
phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2 ||
phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC ||
phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseCR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
need_add_adv_mode = false;
ethtool_link_ksettings_add_link_mode(ks, advertising,
100000baseCR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseSR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
need_add_adv_mode = false;
ethtool_link_ksettings_add_link_mode(ks, advertising,
100000baseSR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_LR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseLR4_ER4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
need_add_adv_mode = false;
ethtool_link_ksettings_add_link_mode(ks, advertising,
100000baseLR4_ER4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseKR4_Full);
if (!hw_link_info->req_speeds ||
hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode)
ethtool_link_ksettings_add_link_mode(ks, advertising,
100000baseKR4_Full);
/* Autoneg PHY types */
if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX ||
phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX ||
phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR ||
phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1 ||
phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
Autoneg);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Autoneg);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
Autoneg);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Autoneg);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
ethtool_link_ksettings_add_link_mode(ks, supported,
Autoneg);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Autoneg);
}
}
#define TEST_SET_BITS_TIMEOUT 50
#define TEST_SET_BITS_SLEEP_MAX 2000
#define TEST_SET_BITS_SLEEP_MIN 1000
/**
* ice_get_settings_link_up - Get Link settings for when link is up
* @ks: ethtool ksettings to fill in
* @netdev: network interface device structure
*/
static void
ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
struct ice_port_info *pi = np->vsi->port_info;
struct ice_link_status *link_info;
struct ice_vsi *vsi = np->vsi;
link_info = &vsi->port_info->phy.link_info;
/* Get supported and advertised settings from PHY ability with media */
ice_phy_type_to_ethtool(netdev, ks);
switch (link_info->link_speed) {
case ICE_AQ_LINK_SPEED_100GB:
ks->base.speed = SPEED_100000;
break;
case ICE_AQ_LINK_SPEED_50GB:
ks->base.speed = SPEED_50000;
break;
case ICE_AQ_LINK_SPEED_40GB:
ks->base.speed = SPEED_40000;
break;
case ICE_AQ_LINK_SPEED_25GB:
ks->base.speed = SPEED_25000;
break;
case ICE_AQ_LINK_SPEED_20GB:
ks->base.speed = SPEED_20000;
break;
case ICE_AQ_LINK_SPEED_10GB:
ks->base.speed = SPEED_10000;
break;
case ICE_AQ_LINK_SPEED_5GB:
ks->base.speed = SPEED_5000;
break;
case ICE_AQ_LINK_SPEED_2500MB:
ks->base.speed = SPEED_2500;
break;
case ICE_AQ_LINK_SPEED_1000MB:
ks->base.speed = SPEED_1000;
break;
case ICE_AQ_LINK_SPEED_100MB:
ks->base.speed = SPEED_100;
break;
default:
netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n",
link_info->link_speed);
break;
}
ks->base.duplex = DUPLEX_FULL;
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
if (link_info->an_info & ICE_AQ_AN_COMPLETED)
ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
Autoneg);
/* Set flow control negotiated Rx/Tx pause */
switch (pi->fc.current_mode) {
case ICE_FC_FULL:
ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
break;
case ICE_FC_TX_PAUSE:
ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
Asym_Pause);
break;
case ICE_FC_RX_PAUSE:
ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
Asym_Pause);
break;
case ICE_FC_PFC:
default:
ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause);
ethtool_link_ksettings_del_link_mode(ks, lp_advertising,
Asym_Pause);
break;
}
}
/**
* ice_get_settings_link_down - Get the Link settings when link is down
* @ks: ethtool ksettings to fill in
* @netdev: network interface device structure
*
* Reports link settings that can be determined when link is down
*/
static void
ice_get_settings_link_down(struct ethtool_link_ksettings *ks,
struct net_device *netdev)
{
/* link is down and the driver needs to fall back on
* supported PHY types to figure out what info to display
*/
ice_phy_type_to_ethtool(netdev, ks);
/* With no link, speed and duplex are unknown */
ks->base.speed = SPEED_UNKNOWN;
ks->base.duplex = DUPLEX_UNKNOWN;
}
/**
* ice_get_link_ksettings - Get Link Speed and Duplex settings
* @netdev: network interface device structure
* @ks: ethtool ksettings
*
* Reports speed/duplex settings based on media_type
*/
static int
ice_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *ks)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_aqc_get_phy_caps_data *caps;
struct ice_link_status *hw_link_info;
struct ice_vsi *vsi = np->vsi;
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
enum ice_status status;
int err = 0;
ethtool_link_ksettings_zero_link_mode(ks, supported);
ethtool_link_ksettings_zero_link_mode(ks, advertising);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
hw_link_info = &vsi->port_info->phy.link_info;
/* set speed and duplex */
if (hw_link_info->link_info & ICE_AQ_LINK_UP)
ice_get_settings_link_up(ks, netdev);
else
ice_get_settings_link_down(ks, netdev);
/* set autoneg settings */
ks->base.autoneg = (hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
AUTONEG_ENABLE : AUTONEG_DISABLE;
/* set media type settings */
switch (vsi->port_info->phy.media_type) {
case ICE_MEDIA_FIBER:
ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
ks->base.port = PORT_FIBRE;
break;
case ICE_MEDIA_BASET:
ethtool_link_ksettings_add_link_mode(ks, supported, TP);
ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
ks->base.port = PORT_TP;
break;
case ICE_MEDIA_BACKPLANE:
ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Backplane);
ks->base.port = PORT_NONE;
break;
case ICE_MEDIA_DA:
ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
ks->base.port = PORT_DA;
break;
default:
ks->base.port = PORT_OTHER;
break;
}
/* flow control is symmetric and always supported */
ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
caps = kzalloc(sizeof(*caps), GFP_KERNEL);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
if (!caps)
return -ENOMEM;
status = ice_aq_get_phy_caps(vsi->port_info, false,
ICE_AQC_REPORT_SW_CFG, caps, NULL);
if (status) {
err = -EIO;
goto done;
}
/* Set the advertised flow control based on the PHY capability */
if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
(caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) {
ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
} else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) {
ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
} else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) {
ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
} else {
ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
ethtool_link_ksettings_del_link_mode(ks, advertising,
Asym_Pause);
}
/* Set advertised FEC modes based on PHY capability */
ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
ethtool_link_ksettings_add_link_mode(ks, advertising,
FEC_BASER);
if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
status = ice_aq_get_phy_caps(vsi->port_info, false,
ICE_AQC_REPORT_TOPO_CAP, caps, NULL);
if (status) {
err = -EIO;
goto done;
}
/* Set supported FEC modes based on PHY capability */
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
done:
kfree(caps);
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
return err;
}
/**
* ice_ksettings_find_adv_link_speed - Find advertising link speed
* @ks: ethtool ksettings
*/
static u16
ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
{
u16 adv_link_speed = 0;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
100baseT_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_100MB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
1000baseX_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
1000baseT_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
1000baseKX_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
2500baseT_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
2500baseX_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
5000baseT_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_5GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
10000baseT_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
10000baseKR_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
10000baseSR_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
10000baseLR_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
25000baseCR_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
25000baseSR_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
25000baseKR_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_25GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
40000baseCR4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
40000baseSR4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
40000baseLR4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
40000baseKR4_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
50000baseCR2_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
50000baseKR2_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
50000baseSR2_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
100000baseCR4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
100000baseSR4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
100000baseLR4_ER4_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
100000baseKR4_Full))
adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
return adv_link_speed;
}
/**
* ice_setup_autoneg
* @p: port info
* @ks: ethtool_link_ksettings
* @config: configuration that will be sent down to FW
* @autoneg_enabled: autonegotiation is enabled or not
* @autoneg_changed: will there a change in autonegotiation
* @netdev: network interface device structure
*
* Setup PHY autonegotiation feature
*/
static int
ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
struct ice_aqc_set_phy_cfg_data *config,
u8 autoneg_enabled, u8 *autoneg_changed,
struct net_device *netdev)
{
int err = 0;
*autoneg_changed = 0;
/* Check autoneg */
if (autoneg_enabled == AUTONEG_ENABLE) {
/* If autoneg was not already enabled */
if (!(p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)) {
/* If autoneg is not supported, return error */
if (!ethtool_link_ksettings_test_link_mode(ks,
supported,
Autoneg)) {
netdev_info(netdev, "Autoneg not supported on this phy.\n");
err = -EINVAL;
} else {
/* Autoneg is allowed to change */
config->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
*autoneg_changed = 1;
}
}
} else {
/* If autoneg is currently enabled */
if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
/* If autoneg is supported 10GBASE_T is the only PHY
* that can disable it, so otherwise return error
*/
if (ethtool_link_ksettings_test_link_mode(ks,
supported,
Autoneg)) {
netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
err = -EINVAL;
} else {
/* Autoneg is allowed to change */
config->caps &= ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
*autoneg_changed = 1;
}
}
}
return err;
}
/**
* ice_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
* @ks: ethtool ksettings
*
* Set speed/duplex per media_types advertised/forced
*/
static int
ice_set_link_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *ks)
{
u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ethtool_link_ksettings safe_ks, copy_ks;
struct ice_aqc_get_phy_caps_data *abilities;
u16 adv_link_speed, curr_link_speed, idx;
struct ice_aqc_set_phy_cfg_data config;
struct ice_pf *pf = np->vsi->back;
struct ice_port_info *p;
u8 autoneg_changed = 0;
enum ice_status status;
u64 phy_type_high;
u64 phy_type_low;
int err = 0;
bool linkup;
p = np->vsi->port_info;
if (!p)
return -EOPNOTSUPP;
/* Check if this is LAN VSI */
ice_for_each_vsi(pf, idx)
if (pf->vsi[idx]->type == ICE_VSI_PF) {
if (np->vsi != pf->vsi[idx])
return -EOPNOTSUPP;
break;
}
if (p->phy.media_type != ICE_MEDIA_BASET &&
p->phy.media_type != ICE_MEDIA_FIBER &&
p->phy.media_type != ICE_MEDIA_BACKPLANE &&
p->phy.media_type != ICE_MEDIA_DA &&
p->phy.link_info.link_info & ICE_AQ_LINK_UP)
return -EOPNOTSUPP;
/* copy the ksettings to copy_ks to avoid modifying the original */
memcpy(&copy_ks, ks, sizeof(copy_ks));
/* save autoneg out of ksettings */
autoneg = copy_ks.base.autoneg;
memset(&safe_ks, 0, sizeof(safe_ks));
/* Get link modes supported by hardware.*/
ice_phy_type_to_ethtool(netdev, &safe_ks);
/* and check against modes requested by user.
* Return an error if unsupported mode was set.
*/
if (!bitmap_subset(copy_ks.link_modes.advertising,
safe_ks.link_modes.supported,
__ETHTOOL_LINK_MODE_MASK_NBITS))
return -EINVAL;
/* get our own copy of the bits to check against */
memset(&safe_ks, 0, sizeof(safe_ks));
safe_ks.base.cmd = copy_ks.base.cmd;
safe_ks.base.link_mode_masks_nwords =
copy_ks.base.link_mode_masks_nwords;
ice_get_link_ksettings(netdev, &safe_ks);
/* set autoneg back to what it currently is */
copy_ks.base.autoneg = safe_ks.base.autoneg;
/* we don't compare the speed */
copy_ks.base.speed = safe_ks.base.speed;
/* If copy_ks.base and safe_ks.base are not the same now, then they are
* trying to set something that we do not support.
*/
if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base)))
return -EOPNOTSUPP;
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
return -EBUSY;
usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
}
abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
if (!abilities)
return -ENOMEM;
/* Get the current PHY config */
status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_SW_CFG, abilities,
NULL);
if (status) {
err = -EAGAIN;
goto done;
}
/* Copy abilities to config in case autoneg is not set below */
memset(&config, 0, sizeof(config));
config.caps = abilities->caps & ~ICE_AQC_PHY_AN_MODE;
if (abilities->caps & ICE_AQC_PHY_AN_MODE)
config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
/* Check autoneg */
err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed,
netdev);
if (err)
goto done;
/* Call to get the current link speed */
p->phy.get_link_info = true;
status = ice_get_link_status(p, &linkup);
if (status) {
err = -EAGAIN;
goto done;
}
curr_link_speed = p->phy.link_info.link_speed;
adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
/* If speed didn't get set, set it to what it currently is.
* This is needed because if advertise is 0 (as it is when autoneg
* is disabled) then speed won't get set.
*/
if (!adv_link_speed)
adv_link_speed = curr_link_speed;
/* Convert the advertise link speeds to their corresponded PHY_TYPE */
ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
if (!autoneg_changed && adv_link_speed == curr_link_speed) {
netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
goto done;
}
/* copy over the rest of the abilities */
config.low_power_ctrl = abilities->low_power_ctrl;
config.eee_cap = abilities->eee_cap;
config.eeer_value = abilities->eeer_value;
config.link_fec_opt = abilities->link_fec_options;
/* save the requested speeds */
p->phy.link_info.req_speeds = adv_link_speed;
/* set link and auto negotiation so changes take effect */
config.caps |= ICE_AQ_PHY_ENA_LINK;
if (phy_type_low || phy_type_high) {
config.phy_type_high = cpu_to_le64(phy_type_high) &
abilities->phy_type_high;
config.phy_type_low = cpu_to_le64(phy_type_low) &
abilities->phy_type_low;
} else {
err = -EAGAIN;
netdev_info(netdev, "Nothing changed. No PHY_TYPE is corresponded to advertised link speed.\n");
goto done;
}
/* If link is up put link down */
if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) {
/* Tell the OS link is going down, the link will go
* back up when fw says it is ready asynchronously
*/
ice_print_link_msg(np->vsi, false);
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
}
/* make the aq call */
status = ice_aq_set_phy_cfg(&pf->hw, lport, &config, NULL);
if (status) {
netdev_info(netdev, "Set phy config failed,\n");
err = -EAGAIN;
}
done:
kfree(abilities);
clear_bit(__ICE_CFG_BUSY, pf->state);
return err;
}
/**
* ice_parse_hdrs - parses headers from RSS hash input
* @nfc: ethtool rxnfc command
*
* This function parses the rxnfc command and returns intended
* header types for RSS configuration
*/
static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
{
u32 hdrs = ICE_FLOW_SEG_HDR_NONE;
switch (nfc->flow_type) {
case TCP_V4_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4;
break;
case UDP_V4_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4;
break;
case SCTP_V4_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
break;
case TCP_V6_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
break;
case UDP_V6_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6;
break;
case SCTP_V6_FLOW:
hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
break;
default:
break;
}
return hdrs;
}
#define ICE_FLOW_HASH_FLD_IPV4_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)
#define ICE_FLOW_HASH_FLD_IPV6_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)
#define ICE_FLOW_HASH_FLD_IPV4_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)
#define ICE_FLOW_HASH_FLD_IPV6_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)
#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)
#define ICE_FLOW_HASH_FLD_TCP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)
#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)
#define ICE_FLOW_HASH_FLD_UDP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)
#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT \
BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)
#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT \
BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
/**
* ice_parse_hash_flds - parses hash fields from RSS hash input
* @nfc: ethtool rxnfc command
*
* This function parses the rxnfc command and returns intended
* hash fields for RSS configuration
*/
static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc)
{
u64 hfld = ICE_HASH_INVALID;
if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) {
switch (nfc->flow_type) {
case TCP_V4_FLOW:
case UDP_V4_FLOW:
case SCTP_V4_FLOW:
if (nfc->data & RXH_IP_SRC)
hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
if (nfc->data & RXH_IP_DST)
hfld |= ICE_FLOW_HASH_FLD_IPV4_DA;
break;
case TCP_V6_FLOW:
case UDP_V6_FLOW:
case SCTP_V6_FLOW:
if (nfc->data & RXH_IP_SRC)
hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
if (nfc->data & RXH_IP_DST)
hfld |= ICE_FLOW_HASH_FLD_IPV6_DA;
break;
default:
break;
}
}
if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) {
switch (nfc->flow_type) {
case TCP_V4_FLOW:
case TCP_V6_FLOW:
if (nfc->data & RXH_L4_B_0_1)
hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT;
if (nfc->data & RXH_L4_B_2_3)
hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT;
break;
case UDP_V4_FLOW:
case UDP_V6_FLOW:
if (nfc->data & RXH_L4_B_0_1)
hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT;
if (nfc->data & RXH_L4_B_2_3)
hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT;
break;
case SCTP_V4_FLOW:
case SCTP_V6_FLOW:
if (nfc->data & RXH_L4_B_0_1)
hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT;
if (nfc->data & RXH_L4_B_2_3)
hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT;
break;
default:
break;
}
}
return hfld;
}
/**
* ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash
* @vsi: the VSI being configured
* @nfc: ethtool rxnfc command
*
* Returns Success if the flow input set is supported.
*/
static int
ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
{
struct ice_pf *pf = vsi->back;
enum ice_status status;
struct device *dev;
u64 hashed_flds;
u32 hdrs;
dev = ice_pf_to_dev(pf);
if (ice_is_safe_mode(pf)) {
dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
vsi->vsi_num);
return -EINVAL;
}
hashed_flds = ice_parse_hash_flds(nfc);
if (hashed_flds == ICE_HASH_INVALID) {
dev_dbg(dev, "Invalid hash fields, vsi num = %d\n",
vsi->vsi_num);
return -EINVAL;
}
hdrs = ice_parse_hdrs(nfc);
if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
vsi->vsi_num);
return -EINVAL;
}
status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
if (status) {
dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n",
vsi->vsi_num, status);
return -EINVAL;
}
return 0;
}
/**
* ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type
* @vsi: the VSI being configured
* @nfc: ethtool rxnfc command
*/
static void
ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
{
struct ice_pf *pf = vsi->back;
struct device *dev;
u64 hash_flds;
u32 hdrs;
dev = ice_pf_to_dev(pf);
nfc->data = 0;
if (ice_is_safe_mode(pf)) {
dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
vsi->vsi_num);
return;
}
hdrs = ice_parse_hdrs(nfc);
if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
vsi->vsi_num);
return;
}
hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs);
if (hash_flds == ICE_HASH_INVALID) {
dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n",
vsi->vsi_num);
return;
}
if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA ||
hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA)
nfc->data |= (u64)RXH_IP_SRC;
if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA ||
hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA)
nfc->data |= (u64)RXH_IP_DST;
if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT ||
hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT ||
hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT)
nfc->data |= (u64)RXH_L4_B_0_1;
if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT ||
hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
nfc->data |= (u64)RXH_L4_B_2_3;
}
/**
* ice_set_rxnfc - command to set Rx flow rules.
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
*
* Returns 0 for success and negative values for errors
*/
static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
switch (cmd->cmd) {
case ETHTOOL_SRXFH:
return ice_set_rss_hash_opt(vsi, cmd);
default:
break;
}
return -EOPNOTSUPP;
}
/**
* ice_get_rxnfc - command to get Rx flow classification rules
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
* @rule_locs: buffer to rturn Rx flow classification rules
*
* Returns Success if the command is supported.
*/
static int
ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
u32 __always_unused *rule_locs)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
int ret = -EOPNOTSUPP;
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
cmd->data = vsi->rss_size;
ret = 0;
break;
case ETHTOOL_GRXFH:
ice_get_rss_hash_opt(vsi, cmd);
ret = 0;
break;
default:
break;
}
return ret;
}
static void
ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
ring->rx_max_pending = ICE_MAX_NUM_DESC;
ring->tx_max_pending = ICE_MAX_NUM_DESC;
ring->rx_pending = vsi->rx_rings[0]->count;
ring->tx_pending = vsi->tx_rings[0]->count;
/* Rx mini and jumbo rings are not supported */
ring->rx_mini_max_pending = 0;
ring->rx_jumbo_max_pending = 0;
ring->rx_mini_pending = 0;
ring->rx_jumbo_pending = 0;
}
static int
ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
u32 new_rx_cnt, new_tx_cnt;
if (ring->tx_pending > ICE_MAX_NUM_DESC ||
ring->tx_pending < ICE_MIN_NUM_DESC ||
ring->rx_pending > ICE_MAX_NUM_DESC ||
ring->rx_pending < ICE_MIN_NUM_DESC) {
netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
ring->tx_pending, ring->rx_pending,
ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC,
ICE_REQ_DESC_MULTIPLE);
return -EINVAL;
}
new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
if (new_tx_cnt != ring->tx_pending)
netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
new_tx_cnt);
new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
if (new_rx_cnt != ring->rx_pending)
netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
new_rx_cnt);
/* if nothing to do return success */
if (new_tx_cnt == vsi->tx_rings[0]->count &&
new_rx_cnt == vsi->rx_rings[0]->count) {
netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
return 0;
}
/* If there is a AF_XDP UMEM attached to any of Rx rings,
* disallow changing the number of descriptors -- regardless
* if the netdev is running or not.
*/
if (ice_xsk_any_rx_ring_ena(vsi))
return -EBUSY;
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
return -EBUSY;
usleep_range(1000, 2000);
}
/* set for the next time the netdev is started */
if (!netif_running(vsi->netdev)) {
for (i = 0; i < vsi->alloc_txq; i++)
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
if (ice_is_xdp_ena_vsi(vsi))
for (i = 0; i < vsi->num_xdp_txq; i++)
vsi->xdp_rings[i]->count = new_tx_cnt;
vsi->num_tx_desc = new_tx_cnt;
vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
if (new_tx_cnt == vsi->tx_rings[0]->count)
goto process_rx;
/* alloc updated Tx resources */
netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
vsi->tx_rings[0]->count, new_tx_cnt);
tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
if (!tx_rings) {
err = -ENOMEM;
goto done;
}
ice_for_each_txq(vsi, i) {
/* clone ring and setup updated count */
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_cnt;
tx_rings[i].desc = NULL;
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
while (i--)
ice_clean_tx_ring(&tx_rings[i]);
kfree(tx_rings);
goto done;
}
}
if (!ice_is_xdp_ena_vsi(vsi))
goto process_rx;
/* alloc updated XDP resources */
netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
vsi->xdp_rings[0]->count, new_tx_cnt);
xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
if (!xdp_rings) {
err = -ENOMEM;
goto free_tx;
}
for (i = 0; i < vsi->num_xdp_txq; i++) {
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
if (err) {
while (i--)
ice_clean_tx_ring(&xdp_rings[i]);
kfree(xdp_rings);
goto free_tx;
}
ice_set_ring_xdp(&xdp_rings[i]);
}
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
/* alloc updated Rx resources */
netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
vsi->rx_rings[0]->count, new_rx_cnt);
rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
if (!rx_rings) {
err = -ENOMEM;
goto done;
}
ice_for_each_rxq(vsi, i) {
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_cnt;
rx_rings[i].desc = NULL;
rx_rings[i].rx_buf = NULL;
/* this is to allow wr32 to have something to write to
* during early allocation of Rx buffers
*/
rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
err = ice_setup_rx_ring(&rx_rings[i]);
if (err)
goto rx_unwind;
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
ICE_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
i--;
ice_free_rx_ring(&rx_rings[i]);
}
kfree(rx_rings);
err = -ENOMEM;
goto free_tx;
}
}
process_link:
/* Bring interface down, copy in the new ring info, then restore the
* interface. if VSI is up, bring it down and then back up
*/
if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
ice_down(vsi);
if (tx_rings) {
ice_for_each_txq(vsi, i) {
ice_free_tx_ring(vsi->tx_rings[i]);
*vsi->tx_rings[i] = tx_rings[i];
}
kfree(tx_rings);
}
if (rx_rings) {
ice_for_each_rxq(vsi, i) {
ice_free_rx_ring(vsi->rx_rings[i]);
/* copy the real tail offset */
rx_rings[i].tail = vsi->rx_rings[i]->tail;
/* this is to fake out the allocation routine
* into thinking it has to realloc everything
* but the recycling logic will let us re-use
* the buffers allocated above
*/
rx_rings[i].next_to_use = 0;
rx_rings[i].next_to_clean = 0;
rx_rings[i].next_to_alloc = 0;
*vsi->rx_rings[i] = rx_rings[i];
}
kfree(rx_rings);
}
if (xdp_rings) {
for (i = 0; i < vsi->num_xdp_txq; i++) {
ice_free_tx_ring(vsi->xdp_rings[i]);
*vsi->xdp_rings[i] = xdp_rings[i];
}
kfree(xdp_rings);
}
vsi->num_tx_desc = new_tx_cnt;
vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;
free_tx:
/* error cleanup if the Rx allocations failed after getting Tx */
if (tx_rings) {
ice_for_each_txq(vsi, i)
ice_free_tx_ring(&tx_rings[i]);
kfree(tx_rings);
}
done:
clear_bit(__ICE_CFG_BUSY, pf->state);
return err;
}
static int ice_nway_reset(struct net_device *netdev)
{
/* restart autonegotiation */
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_port_info *pi;
enum ice_status status;
pi = vsi->port_info;
/* If VSI state is up, then restart autoneg with link up */
if (!test_bit(__ICE_DOWN, vsi->back->state))
status = ice_aq_set_link_restart_an(pi, true, NULL);
else
status = ice_aq_set_link_restart_an(pi, false, NULL);
if (status) {
netdev_info(netdev, "link restart failed, err %d aq_err %d\n",
status, pi->hw->adminq.sq_last_status);
return -EIO;
}
return 0;
}
/**
* ice_get_pauseparam - Get Flow Control status
* @netdev: network interface device structure
* @pause: ethernet pause (flow control) parameters
ice: add lp_advertising flow control support Add support for reporting link partner advertising when ETHTOOL_GLINKSETTINGS defined. Get pause param reports the Tx/Rx pause configured, and then ethtool issues ETHTOOL_GSET ioctl and ice_get_settings_link_up reports the negotiated Tx/Rx pause. Negotiated pause frame report per IEEE 802.3-2005 table 288-3. $ ethtool --show-pause ens6f0 Pause parameters for ens6f0: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on $ ethtool ens6f0 Settings for ens6f0: Supported ports: [ FIBRE ] Supported link modes: 25000baseCR/Full Supported pause frame use: Symmetric Supports auto-negotiation: Yes Supported FEC modes: None BaseR RS Advertised link modes: 25000baseCR/Full Advertised pause frame use: Symmetric Receive-only Advertised auto-negotiation: Yes Advertised FEC modes: None BaseR RS Link partner advertised link modes: Not reported Link partner advertised pause frame use: Symmetric Link partner advertised auto-negotiation: Yes Link partner advertised FEC modes: Not reported Speed: 25000Mb/s Duplex: Full Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Auto-negotiation: on Supports Wake-on: g Wake-on: g Current message level: 0x00000007 (7) drv probe link Link detected: yes When ETHTOOL_GLINKSETTINGS is not defined, get pause param reports the negotiated Tx/Rx pause. Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-06-26 09:20:12 +00:00
*
* Get requested flow control status from PHY capability.
* If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which
* is handled by ice_get_link_ksettings. ice_get_link_ksettings will report
* the negotiated Rx/Tx pause via lp_advertising.
*/
static void
ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_port_info *pi = np->vsi->port_info;
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_dcbx_cfg *dcbx_cfg;
enum ice_status status;
/* Initialize pause params */
pause->rx_pause = 0;
pause->tx_pause = 0;
dcbx_cfg = &pi->local_dcbx_cfg;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return;
/* Get current PHY config */
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
NULL);
if (status)
goto out;
pause->autoneg = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
AUTONEG_ENABLE : AUTONEG_DISABLE);
if (dcbx_cfg->pfc.pfcena)
/* PFC enabled so report LFC as off */
goto out;
if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
pause->tx_pause = 1;
if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
pause->rx_pause = 1;
out:
kfree(pcaps);
}
/**
* ice_set_pauseparam - Set Flow Control parameter
* @netdev: network interface device structure
* @pause: return Tx/Rx flow control status
*/
static int
ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_aqc_get_phy_caps_data *pcaps;
struct ice_link_status *hw_link_info;
struct ice_pf *pf = np->vsi->back;
struct ice_dcbx_cfg *dcbx_cfg;
struct ice_vsi *vsi = np->vsi;
struct ice_hw *hw = &pf->hw;
struct ice_port_info *pi;
enum ice_status status;
u8 aq_failures;
bool link_up;
int err = 0;
u32 is_an;
pi = vsi->port_info;
hw_link_info = &pi->phy.link_info;
dcbx_cfg = &pi->local_dcbx_cfg;
link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
/* Changing the port's flow control is not supported if this isn't the
* PF VSI
*/
if (vsi->type != ICE_VSI_PF) {
netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
return -EOPNOTSUPP;
}
/* Get pause param reports configured and negotiated flow control pause
* when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
* defined get pause param pause->autoneg reports SW configured setting,
* so compare pause->autoneg with SW configured to prevent the user from
* using set pause param to chance autoneg.
*/
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return -ENOMEM;
/* Get current PHY config */
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
NULL);
if (status) {
kfree(pcaps);
return -EIO;
}
is_an = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
AUTONEG_ENABLE : AUTONEG_DISABLE);
kfree(pcaps);
if (pause->autoneg != is_an) {
netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
return -EOPNOTSUPP;
}
/* If we have link and don't have autoneg */
if (!test_bit(__ICE_DOWN, pf->state) &&
!(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
/* Send message that it might not necessarily work*/
netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
}
if (dcbx_cfg->pfc.pfcena) {
netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
return -EOPNOTSUPP;
}
if (pause->rx_pause && pause->tx_pause)
pi->fc.req_mode = ICE_FC_FULL;
else if (pause->rx_pause && !pause->tx_pause)
pi->fc.req_mode = ICE_FC_RX_PAUSE;
else if (!pause->rx_pause && pause->tx_pause)
pi->fc.req_mode = ICE_FC_TX_PAUSE;
else if (!pause->rx_pause && !pause->tx_pause)
pi->fc.req_mode = ICE_FC_NONE;
else
return -EINVAL;
/* Set the FC mode and only restart AN if link is up */
status = ice_set_fc(pi, &aq_failures, link_up);
if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
err = -EAGAIN;
} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
err = -EAGAIN;
} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
err = -EAGAIN;
}
if (!test_bit(__ICE_DOWN, pf->state)) {
/* Give it a little more time to try to come back. If still
* down, restart autoneg link or reinitialize the interface.
*/
msleep(75);
if (!test_bit(__ICE_DOWN, pf->state))
return ice_nway_reset(netdev);
ice_down(vsi);
ice_up(vsi);
}
return err;
}
/**
* ice_get_rxfh_key_size - get the RSS hash key size
* @netdev: network interface device structure
*
* Returns the table size.
*/
static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
{
return ICE_VSIQF_HKEY_ARRAY_SIZE;
}
/**
* ice_get_rxfh_indir_size - get the Rx flow hash indirection table size
* @netdev: network interface device structure
*
* Returns the table size.
*/
static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
return np->vsi->rss_table_size;
}
/**
* ice_get_rxfh - get the Rx flow hash indirection table
* @netdev: network interface device structure
* @indir: indirection table
* @key: hash key
* @hfunc: hash function
*
* Reads the indirection table directly from the hardware.
*/
static int
ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int ret = 0, i;
u8 *lut;
if (hfunc)
*hfunc = ETH_RSS_HASH_TOP;
if (!indir)
return 0;
if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
/* RSS not supported return error here */
netdev_warn(netdev, "RSS is not configured on this VSI!\n");
return -EIO;
}
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
ret = -EIO;
goto out;
}
for (i = 0; i < vsi->rss_table_size; i++)
indir[i] = (u32)(lut[i]);
out:
kfree(lut);
return ret;
}
/**
* ice_set_rxfh - set the Rx flow hash indirection table
* @netdev: network interface device structure
* @indir: indirection table
* @key: hash key
* @hfunc: hash function
*
* Returns -EINVAL if the table specifies an invalid queue ID, otherwise
* returns 0 after programming the table.
*/
static int
ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
const u8 hfunc)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct device *dev;
u8 *seed = NULL;
dev = ice_pf_to_dev(pf);
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
/* RSS not supported return error here */
netdev_warn(netdev, "RSS is not configured on this VSI!\n");
return -EIO;
}
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user =
devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_hkey_user)
return -ENOMEM;
}
memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
seed = vsi->rss_hkey_user;
}
if (!vsi->rss_lut_user) {
vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
GFP_KERNEL);
if (!vsi->rss_lut_user)
return -ENOMEM;
}
/* Each 32 bits pointed by 'indir' is stored with a lut entry */
if (indir) {
int i;
for (i = 0; i < vsi->rss_table_size; i++)
vsi->rss_lut_user[i] = (u8)(indir[i]);
} else {
ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
vsi->rss_size);
}
if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
return -EIO;
return 0;
}
/**
* ice_get_max_txq - return the maximum number of Tx queues for in a PF
* @pf: PF structure
*/
static int ice_get_max_txq(struct ice_pf *pf)
{
return min_t(int, num_online_cpus(),
pf->hw.func_caps.common_cap.num_txq);
}
/**
* ice_get_max_rxq - return the maximum number of Rx queues for in a PF
* @pf: PF structure
*/
static int ice_get_max_rxq(struct ice_pf *pf)
{
return min_t(int, num_online_cpus(),
pf->hw.func_caps.common_cap.num_rxq);
}
/**
* ice_get_combined_cnt - return the current number of combined channels
* @vsi: PF VSI pointer
*
* Go through all queue vectors and count ones that have both Rx and Tx ring
* attached
*/
static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
{
u32 combined = 0;
int q_idx;
ice_for_each_q_vector(vsi, q_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
if (q_vector->rx.ring && q_vector->tx.ring)
combined++;
}
return combined;
}
/**
* ice_get_channels - get the current and max supported channels
* @dev: network interface device structure
* @ch: ethtool channel data structure
*/
static void
ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
{
struct ice_netdev_priv *np = netdev_priv(dev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
/* check to see if VSI is active */
if (test_bit(__ICE_DOWN, vsi->state))
return;
/* report maximum channels */
ch->max_rx = ice_get_max_rxq(pf);
ch->max_tx = ice_get_max_txq(pf);
ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
/* report current channels */
ch->combined_count = ice_get_combined_cnt(vsi);
ch->rx_count = vsi->num_rxq - ch->combined_count;
ch->tx_count = vsi->num_txq - ch->combined_count;
}
/**
* ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
* @vsi: VSI to reconfigure RSS LUT on
* @req_rss_size: requested range of queue numbers for hashing
*
* Set the VSI's RSS parameters, configure the RSS LUT based on these.
*/
static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
{
struct ice_pf *pf = vsi->back;
enum ice_status status;
struct device *dev;
struct ice_hw *hw;
int err = 0;
u8 *lut;
dev = ice_pf_to_dev(pf);
hw = &pf->hw;
if (!req_rss_size)
return -EINVAL;
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
/* set RSS LUT parameters */
if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
vsi->rss_size = 1;
} else {
struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
vsi->rss_size = min_t(int, req_rss_size,
BIT(caps->rss_table_entry_width));
}
/* create/set RSS LUT */
ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
vsi->rss_table_size);
if (status) {
dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
status, hw->adminq.rq_last_status);
err = -EIO;
}
kfree(lut);
return err;
}
/**
* ice_set_channels - set the number channels
* @dev: network interface device structure
* @ch: ethtool channel data structure
*/
static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
{
struct ice_netdev_priv *np = netdev_priv(dev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int new_rx = 0, new_tx = 0;
u32 curr_combined;
/* do not support changing channels in Safe Mode */
if (ice_is_safe_mode(pf)) {
netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
return -EOPNOTSUPP;
}
/* do not support changing other_count */
if (ch->other_count)
return -EINVAL;
curr_combined = ice_get_combined_cnt(vsi);
/* these checks are for cases where user didn't specify a particular
* value on cmd line but we get non-zero value anyway via
* get_channels(); look at ethtool.c in ethtool repository (the user
* space part), particularly, do_schannels() routine
*/
if (ch->rx_count == vsi->num_rxq - curr_combined)
ch->rx_count = 0;
if (ch->tx_count == vsi->num_txq - curr_combined)
ch->tx_count = 0;
if (ch->combined_count == curr_combined)
ch->combined_count = 0;
if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
return -EINVAL;
}
new_rx = ch->combined_count + ch->rx_count;
new_tx = ch->combined_count + ch->tx_count;
if (new_rx > ice_get_max_rxq(pf)) {
netdev_err(dev, "Maximum allowed Rx channels is %d\n",
ice_get_max_rxq(pf));
return -EINVAL;
}
if (new_tx > ice_get_max_txq(pf)) {
netdev_err(dev, "Maximum allowed Tx channels is %d\n",
ice_get_max_txq(pf));
return -EINVAL;
}
ice_vsi_recfg_qs(vsi, new_rx, new_tx);
if (new_rx && !netif_is_rxfh_configured(dev))
return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
return 0;
}
enum ice_container_type {
ICE_RX_CONTAINER,
ICE_TX_CONTAINER,
};
/**
* ice_get_rc_coalesce - get ITR values for specific ring container
* @ec: ethtool structure to fill with driver's coalesce settings
* @c_type: container type, Rx or Tx
* @rc: ring container that the ITR values will come from
*
* Query the device for ice_ring_container specific ITR values. This is
* done per ice_ring_container because each q_vector can have 1 or more rings
* and all of said ring(s) will have the same ITR values.
*
* Returns 0 on success, negative otherwise.
*/
static int
ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
struct ice_ring_container *rc)
{
struct ice_pf *pf;
if (!rc->ring)
return -EINVAL;
pf = rc->ring->vsi->back;
switch (c_type) {
case ICE_RX_CONTAINER:
ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl;
break;
case ICE_TX_CONTAINER:
ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
break;
default:
dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
return -EINVAL;
}
return 0;
}
/**
* ice_get_q_coalesce - get a queue's ITR/INTRL (coalesce) settings
* @vsi: VSI associated to the queue for getting ITR/INTRL (coalesce) settings
* @ec: coalesce settings to program the device with
* @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
*
* Return 0 on success, and negative under the following conditions:
* 1. Getting Tx or Rx ITR/INTRL (coalesce) settings failed.
* 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
*/
static int
ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
{
if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
&vsi->rx_rings[q_num]->q_vector->rx))
return -EINVAL;
if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
&vsi->tx_rings[q_num]->q_vector->tx))
return -EINVAL;
} else if (q_num < vsi->num_rxq) {
if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
&vsi->rx_rings[q_num]->q_vector->rx))
return -EINVAL;
} else if (q_num < vsi->num_txq) {
if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
&vsi->tx_rings[q_num]->q_vector->tx))
return -EINVAL;
} else {
return -EINVAL;
}
return 0;
}
/**
* __ice_get_coalesce - get ITR/INTRL values for the device
* @netdev: pointer to the netdev associated with this query
* @ec: ethtool structure to fill with driver's coalesce settings
* @q_num: queue number to get the coalesce settings for
*
* If the caller passes in a negative q_num then we return coalesce settings
* based on queue number 0, else use the actual q_num passed in.
*/
static int
__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
int q_num)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
if (q_num < 0)
q_num = 0;
if (ice_get_q_coalesce(vsi, ec, q_num))
return -EINVAL;
return 0;
}
static int
ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
{
return __ice_get_coalesce(netdev, ec, -1);
}
static int
ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
struct ethtool_coalesce *ec)
{
return __ice_get_coalesce(netdev, ec, q_num);
}
/**
* ice_set_rc_coalesce - set ITR values for specific ring container
* @c_type: container type, Rx or Tx
* @ec: ethtool structure from user to update ITR settings
* @rc: ring container that the ITR values will come from
* @vsi: VSI associated to the ring container
*
* Set specific ITR values. This is done per ice_ring_container because each
* q_vector can have 1 or more rings and all of said ring(s) will have the same
* ITR values.
*
* Returns 0 on success, negative otherwise.
*/
static int
ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
struct ice_ring_container *rc, struct ice_vsi *vsi)
{
const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx";
u32 use_adaptive_coalesce, coalesce_usecs;
struct ice_pf *pf = vsi->back;
u16 itr_setting;
if (!rc->ring)
return -EINVAL;
switch (c_type) {
case ICE_RX_CONTAINER:
if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
(ec->rx_coalesce_usecs_high &&
ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
c_type_str, pf->hw.intrl_gran,
ICE_MAX_INTRL);
return -EINVAL;
}
if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
ice: Refactor interrupt tracking Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x entries (sw_irq_tracker) and one for hardware MSI-x vectors (hw_irq_tracker). Generally the sw_irq_tracker has less entries than the hw_irq_tracker because the hw_irq_tracker has entries equal to the max allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non SR-IOV portion of the vectors, kernel granted IRQs). All of the non SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.) take at least one of each type of tracker resource. SR-IOV only grabs entries from the hw_irq_tracker. There are a few issues with this approach that can be seen when doing any kind of device reconfiguration (i.e. ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates an ice_q_vector and associates it to a LAN queue pair it will grab and use one entry from the hw_irq_tracker and one from the sw_irq_tracker. If the indices on these does not match it will cause a Tx timeout, which will cause a reset and then the indices will match up again and traffic will resume. The mismatched indices come from the trackers not being the same size and/or the search_hint in the two trackers not being equal. Another reason for the refactor is the co-existence of features with SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end of the sw_irq_tracker then other features can no longer use this space because the hardware has now given the remaining interrupts to SR-IOV. This patch reworks how we track MSI-x vectors by removing the hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV are determined all at once instead of per VF. This can be done because when creating VFs we know how many are wanted and how many MSI-x vectors each VF needs. This also allows us to start using MSI-x resources from the end of the PF's allowed MSI-x vectors so we are less likely to use entries needed for other features (i.e. RDMA, L2 Offload, etc). This patch also reworks the ice_res_tracker structure by removing the search_hint and adding a new member - "end". Instead of having a search_hint we will always search from 0. The new member, "end", will be used to manipulate the end of the ice_res_tracker (specifically sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV. In the normal case, the end of ice_res_tracker will be equal to the ice_res_tracker's num_entries. The sriov_base_vector member was added to the PF structure. It is used to represent the starting MSI-x index of all the needed MSI-x vectors for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may have to take resources from the sw_irq_tracker. This is done by setting the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to calculate the first HW absolute MSI-x index for each VF, which is used to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to determine the MSI-x register index (used for writing to GLINT_DYN_CTL) within the PF's space. Interrupt changes removed any references to hw_base_vector, hw_oicr_idx, and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker variables remain. Change all of these by removing the "sw_" prefix to help avoid confusion with these variables and their use. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-16 17:30:44 +00:00
wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
pf->hw.intrl_gran));
}
use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
coalesce_usecs = ec->rx_coalesce_usecs;
break;
case ICE_TX_CONTAINER:
use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
coalesce_usecs = ec->tx_coalesce_usecs;
break;
default:
dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
c_type);
return -EINVAL;
}
itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
c_type_str, c_type_str);
return -EINVAL;
}
if (coalesce_usecs > ICE_ITR_MAX) {
netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n",
c_type_str, ICE_ITR_MAX);
return -EINVAL;
}
if (use_adaptive_coalesce) {
rc->itr_setting |= ICE_ITR_DYNAMIC;
} else {
/* save the user set usecs */
rc->itr_setting = coalesce_usecs;
/* device ITR granularity is in 2 usec increments */
rc->target_itr = ITR_REG_ALIGN(rc->itr_setting);
}
return 0;
}
/**
* ice_set_q_coalesce - set a queue's ITR/INTRL (coalesce) settings
* @vsi: VSI associated to the queue that need updating
* @ec: coalesce settings to program the device with
* @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
*
* Return 0 on success, and negative under the following conditions:
* 1. Setting Tx or Rx ITR/INTRL (coalesce) settings failed.
* 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
*/
static int
ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
{
if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
&vsi->rx_rings[q_num]->q_vector->rx,
vsi))
return -EINVAL;
if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
&vsi->tx_rings[q_num]->q_vector->tx,
vsi))
return -EINVAL;
} else if (q_num < vsi->num_rxq) {
if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
&vsi->rx_rings[q_num]->q_vector->rx,
vsi))
return -EINVAL;
} else if (q_num < vsi->num_txq) {
if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
&vsi->tx_rings[q_num]->q_vector->tx,
vsi))
return -EINVAL;
} else {
return -EINVAL;
}
return 0;
}
/**
* ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
* @netdev: netdev used for print
* @itr_setting: previous user setting
* @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
* @coalesce_usecs: requested value of [tx|rx]-usecs
* @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
*/
static void
ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
u32 use_adaptive_coalesce, u32 coalesce_usecs,
const char *c_type_str)
{
if (use_adaptive_coalesce)
return;
itr_setting = ITR_TO_REG(itr_setting);
if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
c_type_str, coalesce_usecs, c_type_str,
ITR_REG_ALIGN(coalesce_usecs));
}
/**
* __ice_set_coalesce - set ITR/INTRL values for the device
* @netdev: pointer to the netdev associated with this query
* @ec: ethtool structure to fill with driver's coalesce settings
* @q_num: queue number to get the coalesce settings for
*
* If the caller passes in a negative q_num then we set the coalesce settings
* for all Tx/Rx queues, else use the actual q_num passed in.
*/
static int
__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
int q_num)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
if (q_num < 0) {
struct ice_q_vector *q_vector = vsi->q_vectors[0];
int v_idx;
if (q_vector) {
ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
ec->use_adaptive_rx_coalesce,
ec->rx_coalesce_usecs, "rx");
ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
ec->use_adaptive_tx_coalesce,
ec->tx_coalesce_usecs, "tx");
}
ice_for_each_q_vector(vsi, v_idx) {
/* In some cases if DCB is configured the num_[rx|tx]q
* can be less than vsi->num_q_vectors. This check
* accounts for that so we don't report a false failure
*/
if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
goto set_complete;
if (ice_set_q_coalesce(vsi, ec, v_idx))
return -EINVAL;
}
goto set_complete;
}
if (ice_set_q_coalesce(vsi, ec, q_num))
return -EINVAL;
set_complete:
return 0;
}
static int
ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
{
return __ice_set_coalesce(netdev, ec, -1);
}
static int
ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
struct ethtool_coalesce *ec)
{
return __ice_set_coalesce(netdev, ec, q_num);
}
#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
#define ICE_MODULE_TYPE_SFP 0x03
#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D
#define ICE_MODULE_TYPE_QSFP28 0x11
#define ICE_MODULE_SFF_ADDR_MODE 0x04
#define ICE_MODULE_SFF_DIAG_CAPAB 0x40
#define ICE_MODULE_REVISION_ADDR 0x01
#define ICE_MODULE_SFF_8472_COMP 0x5E
#define ICE_MODULE_SFF_8472_SWAP 0x5C
#define ICE_MODULE_QSFP_MAX_LEN 640
/**
* ice_get_module_info - get SFF module type and revision information
* @netdev: network interface device structure
* @modinfo: module EEPROM size and layout information structure
*/
static int
ice_get_module_info(struct net_device *netdev,
struct ethtool_modinfo *modinfo)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
u8 sff8472_comp = 0;
u8 sff8472_swap = 0;
u8 sff8636_rev = 0;
u8 value = 0;
status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
0, &value, 1, 0, NULL);
if (status)
return -EIO;
switch (value) {
case ICE_MODULE_TYPE_SFP:
status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
ICE_MODULE_SFF_8472_COMP, 0x00, 0,
&sff8472_comp, 1, 0, NULL);
if (status)
return -EIO;
status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
&sff8472_swap, 1, 0, NULL);
if (status)
return -EIO;
if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
modinfo->type = ETH_MODULE_SFF_8079;
modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
} else if (sff8472_comp &&
(sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
modinfo->type = ETH_MODULE_SFF_8472;
modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
} else {
modinfo->type = ETH_MODULE_SFF_8079;
modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
}
break;
case ICE_MODULE_TYPE_QSFP_PLUS:
case ICE_MODULE_TYPE_QSFP28:
status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
ICE_MODULE_REVISION_ADDR, 0x00, 0,
&sff8636_rev, 1, 0, NULL);
if (status)
return -EIO;
/* Check revision compliance */
if (sff8636_rev > 0x02) {
/* Module is SFF-8636 compliant */
modinfo->type = ETH_MODULE_SFF_8636;
modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
} else {
modinfo->type = ETH_MODULE_SFF_8436;
modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
}
break;
default:
netdev_warn(netdev, "SFF Module Type not recognized.\n");
return -EINVAL;
}
return 0;
}
/**
* ice_get_module_eeprom - fill buffer with SFF EEPROM contents
* @netdev: network interface device structure
* @ee: EEPROM dump request structure
* @data: buffer to be filled with EEPROM contents
*/
static int
ice_get_module_eeprom(struct net_device *netdev,
struct ethtool_eeprom *ee, u8 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
bool is_sfp = false;
u16 offset = 0;
u8 value = 0;
u8 page = 0;
int i;
status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
&value, 1, 0, NULL);
if (status)
return -EIO;
if (!ee || !ee->len || !data)
return -EINVAL;
if (value == ICE_MODULE_TYPE_SFP)
is_sfp = true;
for (i = 0; i < ee->len; i++) {
offset = i + ee->offset;
/* Check if we need to access the other memory page */
if (is_sfp) {
if (offset >= ETH_MODULE_SFF_8079_LEN) {
offset -= ETH_MODULE_SFF_8079_LEN;
addr = ICE_I2C_EEPROM_DEV_ADDR2;
}
} else {
while (offset >= ETH_MODULE_SFF_8436_LEN) {
/* Compute memory page number and offset. */
offset -= ETH_MODULE_SFF_8436_LEN / 2;
page++;
}
}
status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
&value, 1, 0, NULL);
if (status)
value = 0;
data[i] = value;
}
return 0;
}
static const struct ethtool_ops ice_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE |
ETHTOOL_COALESCE_RX_USECS_HIGH,
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
.get_drvinfo = ice_get_drvinfo,
.get_regs_len = ice_get_regs_len,
.get_regs = ice_get_regs,
.get_msglevel = ice_get_msglevel,
.set_msglevel = ice_set_msglevel,
.self_test = ice_self_test,
.get_link = ethtool_op_get_link,
.get_eeprom_len = ice_get_eeprom_len,
.get_eeprom = ice_get_eeprom,
.get_coalesce = ice_get_coalesce,
.set_coalesce = ice_set_coalesce,
.get_strings = ice_get_strings,
.set_phys_id = ice_set_phys_id,
.get_ethtool_stats = ice_get_ethtool_stats,
.get_priv_flags = ice_get_priv_flags,
.set_priv_flags = ice_set_priv_flags,
.get_sset_count = ice_get_sset_count,
.get_rxnfc = ice_get_rxnfc,
.set_rxnfc = ice_set_rxnfc,
.get_ringparam = ice_get_ringparam,
.set_ringparam = ice_set_ringparam,
.nway_reset = ice_nway_reset,
.get_pauseparam = ice_get_pauseparam,
.set_pauseparam = ice_set_pauseparam,
.get_rxfh_key_size = ice_get_rxfh_key_size,
.get_rxfh_indir_size = ice_get_rxfh_indir_size,
.get_rxfh = ice_get_rxfh,
.set_rxfh = ice_set_rxfh,
.get_channels = ice_get_channels,
.set_channels = ice_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_per_queue_coalesce = ice_get_per_q_coalesce,
.set_per_queue_coalesce = ice_set_per_q_coalesce,
.get_fecparam = ice_get_fecparam,
.set_fecparam = ice_set_fecparam,
.get_module_info = ice_get_module_info,
.get_module_eeprom = ice_get_module_eeprom,
};
static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
.get_drvinfo = ice_get_drvinfo,
.get_regs_len = ice_get_regs_len,
.get_regs = ice_get_regs,
.get_msglevel = ice_get_msglevel,
.set_msglevel = ice_set_msglevel,
.get_eeprom_len = ice_get_eeprom_len,
.get_eeprom = ice_get_eeprom,
.get_strings = ice_get_strings,
.get_ethtool_stats = ice_get_ethtool_stats,
.get_sset_count = ice_get_sset_count,
.get_ringparam = ice_get_ringparam,
.set_ringparam = ice_set_ringparam,
.nway_reset = ice_nway_reset,
.get_channels = ice_get_channels,
};
/**
* ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops
* @netdev: network interface device structure
*/
void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
{
netdev->ethtool_ops = &ice_ethtool_safe_mode_ops;
}
/**
* ice_set_ethtool_ops - setup netdev ethtool ops
* @netdev: network interface device structure
*
* setup netdev ethtool ops with ice specific ops
*/
void ice_set_ethtool_ops(struct net_device *netdev)
{
netdev->ethtool_ops = &ice_ethtool_ops;
}