From 9c86336c15db1c48cbaddff56caf2be0a930e991 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 20 Aug 2018 10:51:05 +0800 Subject: [PATCH 01/77] ip6_vti: fix a null pointer deference when destroy vti6 tunnel If load ip6_vti module and create a network namespace when set fb_tunnels_only_for_init_net to 1, then exit the namespace will cause following crash: [ 6601.677036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 6601.679057] PGD 8000000425eca067 P4D 8000000425eca067 PUD 424292067 PMD 0 [ 6601.680483] Oops: 0000 [#1] SMP PTI [ 6601.681223] CPU: 7 PID: 93 Comm: kworker/u16:1 Kdump: loaded Tainted: G E 4.18.0+ #3 [ 6601.683153] Hardware name: Fedora Project OpenStack Nova, BIOS seabios-1.7.5-11.el7 04/01/2014 [ 6601.684919] Workqueue: netns cleanup_net [ 6601.685742] RIP: 0010:vti6_exit_batch_net+0x87/0xd0 [ip6_vti] [ 6601.686932] Code: 7b 08 48 89 e6 e8 b9 ea d3 dd 48 8b 1b 48 85 db 75 ec 48 83 c5 08 48 81 fd 00 01 00 00 75 d5 49 8b 84 24 08 01 00 00 48 89 e6 <48> 8b 78 08 e8 90 ea d3 dd 49 8b 45 28 49 39 c6 4c 8d 68 d8 75 a1 [ 6601.690735] RSP: 0018:ffffa897c2737de0 EFLAGS: 00010246 [ 6601.691846] RAX: 0000000000000000 RBX: 0000000000000000 RCX: dead000000000200 [ 6601.693324] RDX: 0000000000000015 RSI: ffffa897c2737de0 RDI: ffffffff9f2ea9e0 [ 6601.694824] RBP: 0000000000000100 R08: 0000000000000000 R09: 0000000000000000 [ 6601.696314] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8dc323c07e00 [ 6601.697812] R13: ffff8dc324a63100 R14: ffffa897c2737e30 R15: ffffa897c2737e30 [ 6601.699345] FS: 0000000000000000(0000) GS:ffff8dc33fdc0000(0000) knlGS:0000000000000000 [ 6601.701068] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6601.702282] CR2: 0000000000000008 CR3: 0000000424966002 CR4: 00000000001606e0 [ 6601.703791] Call Trace: [ 6601.704329] cleanup_net+0x1b4/0x2c0 [ 6601.705268] process_one_work+0x16c/0x370 [ 6601.706145] worker_thread+0x49/0x3e0 [ 6601.706942] kthread+0xf8/0x130 [ 6601.707626] ? rescuer_thread+0x340/0x340 [ 6601.708476] ? kthread_bind+0x10/0x10 [ 6601.709266] ret_from_fork+0x35/0x40 Reproduce: modprobe ip6_vti echo 1 > /proc/sys/net/core/fb_tunnels_only_for_init_net unshare -n exit This because ip6n->tnls_wc[0] point to fallback device in default, but in non-default namespace, ip6n->tnls_wc[0] will be NULL, so add the NULL check comparatively. Fixes: e2948e5af8ee ("ip6_vti: fix creating fallback tunnel device for vti6") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 38dec9da90d3..5095367c7204 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1094,7 +1094,8 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, list); + if (t) + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) From f00d25f3154b676fcea4502a25b94bd7f142ca74 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:42 +0300 Subject: [PATCH 02/77] qed: Wait for ready indication before rereading the shmem The MFW might be reset and re-update its shared memory. Upon the detection of such a reset the driver rereads this memory, but it has to wait till the data is valid. This patch adds the missing wait for a data ready indication. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 50 +++++++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index d89a0e22f6e4..bdcacb31d88b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } +/* Maximum of 1 sec to wait for the SHMEM ready indication */ +#define QED_MCP_SHMEM_RDY_MAX_RETRIES 20 +#define QED_MCP_SHMEM_RDY_ITER_MS 50 + static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; + u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES; + u8 msec = QED_MCP_SHMEM_RDY_ITER_MS; u32 drv_mb_offsize, mfw_mb_offsize; u32 mcp_pf_id = MCP_PF_ID(p_hwfn); p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR); - if (!p_info->public_base) - return 0; + if (!p_info->public_base) { + DP_NOTICE(p_hwfn, + "The address of the MCP scratch-pad is not configured\n"); + return -EINVAL; + } p_info->public_base |= GRCBASE_MCP; + /* Get the MFW MB address and number of supported messages */ + mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, + SECTION_OFFSIZE_ADDR(p_info->public_base, + PUBLIC_MFW_MB)); + p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); + p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, + sup_msgs)); + + /* The driver can notify that there was an MCP reset, and might read the + * SHMEM values before the MFW has completed initializing them. + * To avoid this, the "sup_msgs" field in the MFW mailbox is used as a + * data ready indication. + */ + while (!p_info->mfw_mb_length && --cnt) { + msleep(msec); + p_info->mfw_mb_length = + (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, sup_msgs)); + } + + if (!cnt) { + DP_NOTICE(p_hwfn, + "Failed to get the SHMEM ready notification after %d msec\n", + QED_MCP_SHMEM_RDY_MAX_RETRIES * msec); + return -EBUSY; + } + /* Calculate the driver and MFW mailbox address */ drv_mb_offsize = qed_rd(p_hwfn, p_ptt, SECTION_OFFSIZE_ADDR(p_info->public_base, @@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n", drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id); - /* Set the MFW MB address */ - mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, - SECTION_OFFSIZE_ADDR(p_info->public_base, - PUBLIC_MFW_MB)); - p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); - p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr); - /* Get the current driver mailbox sequence before sending * the first command */ From 76271809f49056f079e202bf6513d17b0d6dd34d Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:43 +0300 Subject: [PATCH 03/77] qed: Wait for MCP halt and resume commands to take place Successive iterations of halting and resuming the management chip (MCP) might fail, since currently the driver doesn't wait for these operations to actually take place. This patch prevents the driver from moving forward before the operations are reflected in the state register. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 46 +++++++++++++++---- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bdcacb31d88b..5f3dbdc7ff1d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -2109,31 +2109,61 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } +/* A maximal 100 msec waiting time for the MCP to halt */ +#define QED_MCP_HALT_SLEEP_MS 10 +#define QED_MCP_HALT_MAX_RETRIES 10 + int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 resp = 0, param = 0; + u32 resp = 0, param = 0, cpu_state, cnt = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, ¶m); - if (rc) + if (rc) { DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + return rc; + } - return rc; + do { + msleep(QED_MCP_HALT_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) + break; + } while (++cnt < QED_MCP_HALT_MAX_RETRIES); + + if (cnt == QED_MCP_HALT_MAX_RETRIES) { + DP_NOTICE(p_hwfn, + "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state); + return -EBUSY; + } + + return 0; } +#define QED_MCP_RESUME_SLEEP_MS 10 + int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 value, cpu_mode; + u32 cpu_mode, cpu_state; qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); - value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); - value &= ~MCP_REG_CPU_MODE_SOFT_HALT; - qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode); + msleep(QED_MCP_RESUME_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); - return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) { + DP_NOTICE(p_hwfn, + "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + cpu_mode, cpu_state); + return -EBUSY; + } + + return 0; } int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index d8ad2dcad8d5..2279965f8f8a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -562,6 +562,7 @@ 0 #define MCP_REG_CPU_STATE \ 0xe05004UL +#define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL #define PGLUE_B_REG_PF_BAR0_SIZE \ From eaa50fc59e5841910987e90b0438b2643041f508 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:44 +0300 Subject: [PATCH 04/77] qed: Prevent a possible deadlock during driver load and unload The MFW manages an internal lock to prevent concurrent hardware (de)initialization of different PFs. This, together with the busy-waiting for the MFW's responses for commands, might lead to a deadlock during concurrent load or unload of PFs. This patch adds the option to sleep within the busy-waiting, and uses it for the (un)load requests (which are not sent from an interrupt context) to prevent the possible deadlock. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 43 ++++++++++++++++------- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 21 ++++++----- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 5f3dbdc7ff1d..b7279e625db3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -48,7 +48,7 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define CHIP_MCP_RESP_ITER_US 10 +#define QED_MCP_RESP_ITER_US 10 #define QED_DRV_MB_MAX_RETRIES (500 * 1000) /* Account for 5 sec */ #define QED_MCP_RESET_RETRIES (50 * 1000) /* Account for 500 msec */ @@ -317,7 +317,7 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0; + u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; /* Ensure that only a single thread is accessing the mailbox */ @@ -449,10 +449,10 @@ static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_mb_params *p_mb_params, - u32 max_retries, u32 delay) + u32 max_retries, u32 usecs) { + u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000); struct qed_mcp_cmd_elem *p_cmd_elem; - u32 cnt = 0; u16 seq_num; int rc = 0; @@ -475,7 +475,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, goto err; spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); - udelay(delay); + + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); } while (++cnt < max_retries); if (cnt >= max_retries) { @@ -504,7 +508,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, * The spinlock stays locked until the list element is removed. */ - udelay(delay); + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); if (p_cmd_elem->b_is_completed) @@ -539,7 +547,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n", p_mb_params->mcp_resp, p_mb_params->mcp_param, - (cnt * delay) / 1000, (cnt * delay) % 1000); + (cnt * usecs) / 1000, (cnt * usecs) % 1000); /* Clear the sequence number from the MFW response */ p_mb_params->mcp_resp &= FW_MSG_CODE_MASK; @@ -557,7 +565,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, { size_t union_data_size = sizeof(union drv_union_data); u32 max_retries = QED_DRV_MB_MAX_RETRIES; - u32 delay = CHIP_MCP_RESP_ITER_US; + u32 usecs = QED_MCP_RESP_ITER_US; /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { @@ -574,8 +582,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) { + max_retries = DIV_ROUND_UP(max_retries, 1000); + usecs *= 1000; + } + return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries, - delay); + usecs); } int qed_mcp_cmd(struct qed_hwfn *p_hwfn, @@ -793,6 +806,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -1014,7 +1028,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 wol_param, mcp_resp, mcp_param; + struct qed_mcp_mb_params mb_params; + u32 wol_param; switch (p_hwfn->cdev->wol_config) { case QED_OV_WOL_DISABLED: @@ -1032,8 +1047,12 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; } - return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param, - &mcp_resp, &mcp_param); + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; + mb_params.param = wol_param; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + + return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 047976d5c6e9..b9d3ecf7aad6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -660,14 +660,19 @@ struct qed_mcp_info { }; struct qed_mcp_mb_params { - u32 cmd; - u32 param; - void *p_data_src; - u8 data_src_size; - void *p_data_dst; - u8 data_dst_size; - u32 mcp_resp; - u32 mcp_param; + u32 cmd; + u32 param; + void *p_data_src; + void *p_data_dst; + u8 data_src_size; + u8 data_dst_size; + u32 mcp_resp; + u32 mcp_param; + u32 flags; +#define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAGS_IS_SET(params, flag) \ + ({ typeof(params) __params = (params); \ + (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) }; struct qed_drv_tlv_hdr { From b310974e041913231b6e3d5d475d4df55c312301 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:45 +0300 Subject: [PATCH 05/77] qed: Avoid sending mailbox commands when MFW is not responsive Keep sending mailbox commands to the MFW when it is not responsive ends up with a redundant amount of timeout expiries. This patch prints the MCP status on the first command which is not responded, and blocks the following commands. Since the (un)load request commands might be not responded due to other PFs, the patch also adds the option to skip the blocking upon a failure. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 52 ++++++++++++++++++- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 6 ++- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index b7279e625db3..5d37ec7e9b0b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -320,6 +320,12 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n"); + return -EBUSY; + } + /* Ensure that only a single thread is accessing the mailbox */ spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -445,6 +451,33 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, (p_mb_params->cmd | seq_num), p_mb_params->param); } +static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd) +{ + p_hwfn->mcp_info->b_block_cmd = block_cmd; + + DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n", + block_cmd ? "Block" : "Unblock"); +} + +static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2; + u32 delay = QED_MCP_RESP_ITER_US; + + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + + DP_NOTICE(p_hwfn, + "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n", + cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2); +} + static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -531,11 +564,15 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "The MFW failed to respond to command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); + qed_mcp_print_cpu_info(p_hwfn, p_ptt); spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) + qed_mcp_cmd_set_blocking(p_hwfn, true); + return -EAGAIN; } @@ -573,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EBUSY; } + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n", + p_mb_params->cmd, p_mb_params->param); + return -EBUSY; + } + if (p_mb_params->data_src_size > union_data_size || p_mb_params->data_dst_size > union_data_size) { DP_ERR(p_hwfn, @@ -806,7 +850,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -1050,7 +1094,7 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) memset(&mb_params, 0, sizeof(mb_params)); mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; mb_params.param = wol_param; - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } @@ -2158,6 +2202,8 @@ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, true); + return 0; } @@ -2182,6 +2228,8 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, false); + return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index b9d3ecf7aad6..85e6b3989e7a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -635,11 +635,14 @@ struct qed_mcp_info { */ spinlock_t cmd_lock; + /* Flag to indicate whether sending a MFW mailbox command is blocked */ + bool b_block_cmd; + /* Spinlock used for syncing SW link-changes and link-changes * originating from attention context. */ spinlock_t link_lock; - bool block_mb_sending; + u32 public_base; u32 drv_mb_addr; u32 mfw_mb_addr; @@ -670,6 +673,7 @@ struct qed_mcp_mb_params { u32 mcp_param; u32 flags; #define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAG_AVOID_BLOCK (0x1 << 1) #define QED_MB_FLAGS_IS_SET(params, flag) \ ({ typeof(params) __params = (params); \ (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 2279965f8f8a..f736f70956fd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -565,6 +565,7 @@ #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL +#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL #define PGLUE_B_REG_PF_BAR0_SIZE \ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ From 176eb614b118c96e7797f5ddefd10708c316f621 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 20 Aug 2018 12:43:51 +0800 Subject: [PATCH 06/77] r8152: disable RX aggregation on new Dell TB16 dock There's a new Dell TB16 dock with a different iSerialNumber. Apply the same fix from commit 0b1655143df0 ("r8152: disable RX aggregation on Dell TB16 dock") to this model. BugLink: https://bugs.launchpad.net/bugs/1785780 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 97742708460b..2cd71bdb6484 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5217,8 +5217,8 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && - udev->serial && !strcmp(udev->serial, "000001000000")) { + if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && + (!strcmp(udev->serial, "000001000000") || !strcmp(udev->serial, "000002000000"))) { dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); } From 80f1a0f4e0cd4bfc8a74fc1c39843a6e7b206b95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Aug 2018 13:02:41 -0700 Subject: [PATCH 07/77] net/ipv6: Put lwtstate when destroying fib6_info Prior to the introduction of fib6_info lwtstate was managed by the dst code. With fib6_info releasing lwtstate needs to be done when the struct is freed. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d212738e9d10..c861a6d4671d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -198,6 +198,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) } } + lwtstate_put(f6i->fib6_nh.nh_lwtstate); + if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); From ab08dcd724543896303eae7de6288242bbaff458 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 01:41:56 +0000 Subject: [PATCH 08/77] rhashtable: remove duplicated include from rhashtable.c Remove duplicated include. Signed-off-by: Yue Haibing Signed-off-by: David S. Miller --- lib/rhashtable.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ae4223e0f5bc..672eecda874a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -28,7 +28,6 @@ #include #include #include -#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U From 51474eff2bc2777061ab3658e014a37dc9d7a775 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Aug 2018 11:57:07 +0200 Subject: [PATCH 09/77] Bluetooth: Make BT_HCIUART_RTL configuration option depend on ACPI At the moment we only support ACPI enumeration for serial port attached RTL bluetooth controllers. This commit adds a dependency on ACPI to the BT_HCIUART_RTL configuration option, fixing the following warning when ACPI is not enabled: drivers/bluetooth/hci_h5.c:920:22: warning: 'rtl_vnd' defined but not used Cc: Arnd Bergmann Reported-by: Arnd Bergmann Signed-off-by: Hans de Goede Acked-by: Arnd Bergmann Acked-by: Geert Uytterhoeven Signed-off-by: Marcel Holtmann --- drivers/bluetooth/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 2df11cc08a46..845b0314ce3a 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -200,6 +200,7 @@ config BT_HCIUART_RTL depends on BT_HCIUART depends on BT_HCIUART_SERDEV depends on GPIOLIB + depends on ACPI select BT_HCIUART_3WIRE select BT_RTL help From addb3ffbca66954fb1d1791d2db2153c403f81af Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 14 Aug 2018 10:10:31 -0500 Subject: [PATCH 10/77] Bluetooth: mediatek: Fix memory leak In case memory resources for *fw* were allocated, release them before return. Addresses-Coverity-ID: 1472611 ("Resource leak") Fixes: 7237c4c9ec92 ("Bluetooth: mediatek: Add protocol support for MediaTek serial devices") Signed-off-by: Gustavo A. R. Silva Acked-by: Sean Wang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btmtkuart.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index ed2a5c7cb77f..4593baff2bc9 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -144,8 +144,10 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_size = fw->size; /* The size of patch header is 30 bytes, should be skip */ - if (fw_size < 30) - return -EINVAL; + if (fw_size < 30) { + err = -EINVAL; + goto free_fw; + } fw_size -= 30; fw_ptr += 30; @@ -172,8 +174,8 @@ static int mtk_setup_fw(struct hci_dev *hdev) fw_ptr += dlen; } +free_fw: release_firmware(fw); - return err; } From 093dee661d6004738c4cbcbf48835c1e6c6ebae3 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 13:58:08 +0000 Subject: [PATCH 11/77] sch_cake: Remove unused including Remove including that don't need it. Signed-off-by: Yue Haibing Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 35fc7252187c..4d26b0823cdf 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include #include From c27f1e2e9f29563cb093e96261e87c1ef83aeb98 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 14:05:42 +0000 Subject: [PATCH 12/77] rds: tcp: remove duplicated include from tcp.c Remove duplicated include. Signed-off-by: Yue Haibing Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2c7b7c352d3e..b9bbcf3d6c63 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "rds.h" From b93c1b5ac8643cc08bb74fa8ae21d6c63dfcb23d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 21 Aug 2018 10:40:38 -0700 Subject: [PATCH 13/77] hv_netvsc: ignore devices that are not PCI Registering another device with same MAC address (such as TAP, VPN or DPDK KNI) will confuse the VF autobinding logic. Restrict the search to only run if the device is known to be a PCI attached VF. Fixes: e8ff40d4bff1 ("hv_netvsc: improve VF device matching") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 507f68190cb1..1121a1ec407c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -2039,12 +2040,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct net_device_context *net_device_ctx; + struct device *pdev = vf_netdev->dev.parent; struct netvsc_device *netvsc_dev; int ret; if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; + if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching From edfaf94fa705181eeb2fe0c36c0b902dedbd40f1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:05 -0700 Subject: [PATCH 14/77] net_sched: improve and refactor tcf_action_put_many() tcf_action_put_many() is mostly called to clean up actions on failure path, but tcf_action_put_many(&actions[acts_deleted]) is used in the ugliest way: it passes a slice of the array and uses an additional NULL at the end to avoid out-of-bound access. acts_deleted is completely unnecessary since we can teach tcf_action_put_many() scan the whole array and checks against NULL pointer. Which also means tcf_action_delete() should set deleted action pointers to NULL to avoid double free. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 229d63c99be2..cd69a6afcf88 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -686,14 +686,18 @@ static int tcf_action_put(struct tc_action *p) return __tcf_action_put(p, false); } +/* Put all actions in this array, skip those NULL's. */ static void tcf_action_put_many(struct tc_action *actions[]) { int i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { struct tc_action *a = actions[i]; - const struct tc_action_ops *ops = a->ops; + const struct tc_action_ops *ops; + if (!a) + continue; + ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } @@ -1176,7 +1180,7 @@ err_out: } static int tcf_action_delete(struct net *net, struct tc_action *actions[], - int *acts_deleted, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { u32 act_index; int ret, i; @@ -1196,20 +1200,17 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[], } else { /* now do the delete */ ret = ops->delete(net, act_index); - if (ret < 0) { - *acts_deleted = i + 1; + if (ret < 0) return ret; - } } + actions[i] = NULL; } - *acts_deleted = i; return 0; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - int *acts_deleted, u32 portid, size_t attr_size, - struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; @@ -1227,7 +1228,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, acts_deleted, extack); + ret = tcf_action_delete(net, actions, extack); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); @@ -1249,8 +1250,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; - struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {}; - int acts_deleted = 0; + struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) @@ -1280,14 +1280,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, actions, &acts_deleted, portid, - attr_size, extack); + ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; - return ret; + return 0; } err: - tcf_action_put_many(&actions[acts_deleted]); + tcf_action_put_many(actions); return ret; } From 97a3f84f2c84f81b859aedd2c186df09c2ee21a6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:06 -0700 Subject: [PATCH 15/77] net_sched: remove unnecessary ops->delete() All ops->delete() wants is getting the tn->idrinfo, but we already have tc_action before calling ops->delete(), and tc_action has a pointer ->idrinfo. More importantly, each type of action does the same thing, that is, just calling tcf_idr_delete_index(). So it can be just removed. Fixes: b409074e6693 ("net: sched: add 'delete' function to action ops") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 15 +++++++-------- net/sched/act_bpf.c | 8 -------- net/sched/act_connmark.c | 8 -------- net/sched/act_csum.c | 8 -------- net/sched/act_gact.c | 8 -------- net/sched/act_ife.c | 8 -------- net/sched/act_ipt.c | 16 ---------------- net/sched/act_mirred.c | 8 -------- net/sched/act_nat.c | 8 -------- net/sched/act_pedit.c | 8 -------- net/sched/act_police.c | 8 -------- net/sched/act_sample.c | 8 -------- net/sched/act_simple.c | 8 -------- net/sched/act_skbedit.c | 8 -------- net/sched/act_skbmod.c | 8 -------- net/sched/act_tunnel_key.c | 8 -------- net/sched/act_vlan.c | 8 -------- 18 files changed, 7 insertions(+), 146 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 1ad5b19e83a9..e32708491d83 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -102,7 +102,6 @@ struct tc_action_ops { size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); - int (*delete)(struct net *net, u32 index); }; struct tc_action_net { @@ -158,7 +157,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index); int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); static inline int tcf_idr_release(struct tc_action *a, bool bind) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cd69a6afcf88..00bf7d2b0bdd 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -337,9 +337,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } EXPORT_SYMBOL(tcf_idr_check); -int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) +static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { - struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret = 0; @@ -370,7 +369,6 @@ int tcf_idr_delete_index(struct tc_action_net *tn, u32 index) spin_unlock(&idrinfo->lock); return ret; } -EXPORT_SYMBOL(tcf_idr_delete_index); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, @@ -1182,24 +1180,25 @@ err_out: static int tcf_action_delete(struct net *net, struct tc_action *actions[], struct netlink_ext_ack *extack) { - u32 act_index; - int ret, i; + int i; for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { struct tc_action *a = actions[i]; const struct tc_action_ops *ops = a->ops; - /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ - act_index = a->tcfa_index; + struct tcf_idrinfo *idrinfo = a->idrinfo; + u32 act_index = a->tcfa_index; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { + int ret; + /* now do the delete */ - ret = ops->delete(net, act_index); + ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index d30b23e42436..0c68bc9cf0b4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -395,13 +395,6 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_bpf_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .type = TCA_ACT_BPF, @@ -412,7 +405,6 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .init = tcf_bpf_init, .walk = tcf_bpf_walker, .lookup = tcf_bpf_search, - .delete = tcf_bpf_delete, .size = sizeof(struct tcf_bpf), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 54c0bf54f2ac..6f0f273f1139 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -198,13 +198,6 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_connmark_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .type = TCA_ACT_CONNMARK, @@ -214,7 +207,6 @@ static struct tc_action_ops act_connmark_ops = { .init = tcf_connmark_init, .walk = tcf_connmark_walker, .lookup = tcf_connmark_search, - .delete = tcf_connmark_delete, .size = sizeof(struct tcf_connmark_info), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e698d3fe2080..b8a67ae3105a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -659,13 +659,6 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_csum)); } -static int tcf_csum_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, @@ -677,7 +670,6 @@ static struct tc_action_ops act_csum_ops = { .walk = tcf_csum_walker, .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, - .delete = tcf_csum_delete, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 6a3f25a8ffb3..cd1d9bd32ef9 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -243,13 +243,6 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) return sz; } -static int tcf_gact_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -261,7 +254,6 @@ static struct tc_action_ops act_gact_ops = { .walk = tcf_gact_walker, .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, - .delete = tcf_gact_delete, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index d1081bdf1bdb..92fcf8ba5bca 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -853,13 +853,6 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ife_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ife_ops = { .kind = "ife", .type = TCA_ACT_IFE, @@ -870,7 +863,6 @@ static struct tc_action_ops act_ife_ops = { .init = tcf_ife_init, .walk = tcf_ife_walker, .lookup = tcf_ife_search, - .delete = tcf_ife_delete, .size = sizeof(struct tcf_ife_info), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 51f235bbeb5b..23273b5303fd 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -337,13 +337,6 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_ipt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .type = TCA_ACT_IPT, @@ -354,7 +347,6 @@ static struct tc_action_ops act_ipt_ops = { .init = tcf_ipt_init, .walk = tcf_ipt_walker, .lookup = tcf_ipt_search, - .delete = tcf_ipt_delete, .size = sizeof(struct tcf_ipt), }; @@ -395,13 +387,6 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_xt_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_xt_ops = { .kind = "xt", .type = TCA_ACT_XT, @@ -412,7 +397,6 @@ static struct tc_action_ops act_xt_ops = { .init = tcf_xt_init, .walk = tcf_xt_walker, .lookup = tcf_xt_search, - .delete = tcf_xt_delete, .size = sizeof(struct tcf_ipt), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 38fd20f10f67..8bf66d0a6800 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -395,13 +395,6 @@ static void tcf_mirred_put_dev(struct net_device *dev) dev_put(dev); } -static int tcf_mirred_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .type = TCA_ACT_MIRRED, @@ -416,7 +409,6 @@ static struct tc_action_ops act_mirred_ops = { .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, .put_dev = tcf_mirred_put_dev, - .delete = tcf_mirred_delete, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 822e903bfc25..4313aa102440 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -300,13 +300,6 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_nat_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_nat_ops = { .kind = "nat", .type = TCA_ACT_NAT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .init = tcf_nat_init, .walk = tcf_nat_walker, .lookup = tcf_nat_search, - .delete = tcf_nat_delete, .size = sizeof(struct tcf_nat), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8a7a7cb94e83..107034070019 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -460,13 +460,6 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_pedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .type = TCA_ACT_PEDIT, @@ -477,7 +470,6 @@ static struct tc_action_ops act_pedit_ops = { .init = tcf_pedit_init, .walk = tcf_pedit_walker, .lookup = tcf_pedit_search, - .delete = tcf_pedit_delete, .size = sizeof(struct tcf_pedit), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 06f0742db593..5d8bfa878477 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -320,13 +320,6 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_police_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_idr_delete_index(tn, index); -} - MODULE_AUTHOR("Alexey Kuznetsov"); MODULE_DESCRIPTION("Policing actions"); MODULE_LICENSE("GPL"); @@ -340,7 +333,6 @@ static struct tc_action_ops act_police_ops = { .init = tcf_police_init, .walk = tcf_police_walker, .lookup = tcf_police_search, - .delete = tcf_police_delete, .size = sizeof(struct tcf_police), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 207b4132d1b0..44e9c00657bc 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -232,13 +232,6 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_sample_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_sample_ops = { .kind = "sample", .type = TCA_ACT_SAMPLE, @@ -249,7 +242,6 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, - .delete = tcf_sample_delete, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e616523ba3c1..52400d49f81f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -196,13 +196,6 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_simp_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_simp_ops = { .kind = "simple", .type = TCA_ACT_SIMP, @@ -213,7 +206,6 @@ static struct tc_action_ops act_simp_ops = { .init = tcf_simp_init, .walk = tcf_simp_walker, .lookup = tcf_simp_search, - .delete = tcf_simp_delete, .size = sizeof(struct tcf_defact), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 926d7bc4a89d..73e44ce2a883 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -299,13 +299,6 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbedit_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .type = TCA_ACT_SKBEDIT, @@ -316,7 +309,6 @@ static struct tc_action_ops act_skbedit_ops = { .cleanup = tcf_skbedit_cleanup, .walk = tcf_skbedit_walker, .lookup = tcf_skbedit_search, - .delete = tcf_skbedit_delete, .size = sizeof(struct tcf_skbedit), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d6a1af0c4171..588077fafd6c 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -259,13 +259,6 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_skbmod_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .type = TCA_ACT_SKBMOD, @@ -276,7 +269,6 @@ static struct tc_action_ops act_skbmod_ops = { .cleanup = tcf_skbmod_cleanup, .walk = tcf_skbmod_walker, .lookup = tcf_skbmod_search, - .delete = tcf_skbmod_delete, .size = sizeof(struct tcf_skbmod), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8f09cf08d8fe..420759153d5f 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -548,13 +548,6 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tunnel_key_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .type = TCA_ACT_TUNNEL_KEY, @@ -565,7 +558,6 @@ static struct tc_action_ops act_tunnel_key_ops = { .cleanup = tunnel_key_release, .walk = tunnel_key_walker, .lookup = tunnel_key_search, - .delete = tunnel_key_delete, .size = sizeof(struct tcf_tunnel_key), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 209e70ad2c09..033d273afe50 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -296,13 +296,6 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } -static int tcf_vlan_delete(struct net *net, u32 index) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_idr_delete_index(tn, index); -} - static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .type = TCA_ACT_VLAN, @@ -313,7 +306,6 @@ static struct tc_action_ops act_vlan_ops = { .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, - .delete = tcf_vlan_delete, .size = sizeof(struct tcf_vlan), }; From b144e7ec51a132eac00a68bf897b6349d810022f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:07 -0700 Subject: [PATCH 16/77] net_sched: remove unused parameter for tcf_action_delete() Fixes: 16af6067392c ("net: sched: implement reference counted action release") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 00bf7d2b0bdd..ba55226928a3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1177,8 +1177,7 @@ err_out: return err; } -static int tcf_action_delete(struct net *net, struct tc_action *actions[], - struct netlink_ext_ack *extack) +static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { int i; @@ -1227,7 +1226,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } /* now do the delete */ - ret = tcf_action_delete(net, actions, extack); + ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); From 7d485c451fc82f8ae431cdb379521bc6d0641064 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:08 -0700 Subject: [PATCH 17/77] net_sched: remove unused tcf_idr_check() tcf_idr_check() is replaced by tcf_idr_check_alloc(), and __tcf_idr_check() now can be folded into tcf_idr_search(). Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 22 +++------------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index e32708491d83..eaa0e8b93d5b 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -147,8 +147,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind); int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ba55226928a3..d76948f02a02 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -300,21 +300,17 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, - struct tc_action **a, int bind) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; spin_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); - if (IS_ERR(p)) { + if (IS_ERR(p)) p = NULL; - } else if (p) { + else if (p) refcount_inc(&p->tcfa_refcnt); - if (bind) - atomic_inc(&p->tcfa_bindcnt); - } spin_unlock(&idrinfo->lock); if (p) { @@ -323,20 +319,8 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 index, } return false; } - -int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) -{ - return __tcf_idr_check(tn, index, a, 0); -} EXPORT_SYMBOL(tcf_idr_search); -bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) -{ - return __tcf_idr_check(tn, index, a, bind); -} -EXPORT_SYMBOL(tcf_idr_check); - static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; From 244cd96adb5f5ab39551081fb1f9009a54bb12ee Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:09 -0700 Subject: [PATCH 18/77] net_sched: remove list_head from tc_action After commit 90b73b77d08e, list_head is no longer needed. Now we just need to convert the list iteration to array iteration for drivers. Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to actions") Cc: Jiri Pirko Cc: Vlad Buslov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 ++--- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 10 +++----- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 5 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++--- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++++++------- .../net/ethernet/mellanox/mlxsw/spectrum.c | 3 +-- .../ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++--- .../ethernet/netronome/nfp/flower/action.c | 6 ++--- .../net/ethernet/qlogic/qede/qede_filter.c | 6 ++--- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 5 ++-- include/net/act_api.h | 1 - include/net/pkt_cls.h | 25 +++++++++++-------- net/dsa/slave.c | 4 +-- net/sched/act_api.c | 1 - 14 files changed, 43 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 139d96c5a023..092c817f8f11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, struct tcf_exts *tc_exts) { const struct tc_action *tc_act; - LIST_HEAD(tc_actions); - int rc; + int i, rc; if (!tcf_exts_has_actions(tc_exts)) { netdev_info(bp->dev, "no actions"); return -EINVAL; } - tcf_exts_to_list(tc_exts, &tc_actions); - list_for_each_entry(tc_act, &tc_actions, list) { + tcf_exts_for_each_action(i, tc_act, tc_exts) { /* Drop action */ if (is_tcf_gact_shot(tc_act)) { actions->flags |= BNXT_TC_ACTION_FLAG_DROP; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 623f73dd7738..c116f96956fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device *in, struct ch_filter_specification *fs) { const struct tc_action *a; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { fs->action = FILTER_PASS; } else if (is_tcf_gact_shot(a)) { @@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, bool act_redir = false; bool act_pedit = false; bool act_vlan = false; - LIST_HEAD(actions); + int i; - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, cls->exts) { if (is_tcf_gact_ok(a)) { /* Do nothing */ } else if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 18eb2aedd4cb..c7d2b4dc7568 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap, unsigned int num_actions = 0; const struct tc_action *a; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { /* Don't allow more than one action per rule. */ if (num_actions) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 447098005490..af4c9ae7f432 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9171,14 +9171,12 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; - LIST_HEAD(actions); + int i; if (!tcf_exts_has_actions(exts)) return -EINVAL; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - + tcf_exts_for_each_action(i, a, exts) { /* Drop action */ if (is_tcf_gact_shot(a)) { *action = IXGBE_FDIR_DROP_QUEUE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9131a1376e7d..9fed54017659 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1982,14 +1982,15 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { + int k; + if (!is_tcf_pedit(a)) continue; nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); + for (k = 0; k < nkeys; k++) { + htype = tcf_pedit_htype(a, k); if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { modify_ip_header = true; @@ -2053,15 +2054,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, const struct tc_action *a; LIST_HEAD(actions); u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, @@ -2666,7 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); bool encap = false; u32 action = 0; - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return -EINVAL; @@ -2674,8 +2674,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_shot(a)) { action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6070d1591d1e..930700413b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1346,8 +1346,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(f->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(f->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct mlxsw_sp_port_mall_mirror_tc_entry *mirror; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ebd1b24ebaa5..8d211972c5e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -21,8 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { const struct tc_action *a; - LIST_HEAD(actions); - int err; + int err, i; if (!tcf_exts_has_actions(exts)) return 0; @@ -32,8 +31,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { if (is_tcf_gact_ok(a)) { err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0ba0356ec4e6..9044496803e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -796,11 +796,10 @@ int nfp_flower_compile_action(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err, tun_out_cnt, out_cnt; + int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; enum nfp_flower_tun_type tun_type; const struct tc_action *a; u32 csum_updated = 0; - LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; @@ -810,8 +809,7 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - tcf_exts_to_list(flow->exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, flow->exts) { err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9673d19308e6..b16ce7d93caf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2006,18 +2006,16 @@ unlock: static int qede_parse_actions(struct qede_dev *edev, struct tcf_exts *exts) { - int rc = -EINVAL, num_act = 0; + int rc = -EINVAL, num_act = 0, i; const struct tc_action *a; bool is_drop = false; - LIST_HEAD(actions); if (!tcf_exts_has_actions(exts)) { DP_NOTICE(edev, "No tc actions received\n"); return rc; } - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { + tcf_exts_for_each_action(i, a, exts) { num_act++; if (is_tcf_gact_shot(a)) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1a96dd9c1091..531294f4978b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -61,7 +61,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, struct stmmac_tc_entry *action_entry = entry; const struct tc_action *act; struct tcf_exts *exts; - LIST_HEAD(actions); + int i; exts = cls->knode.exts; if (!tcf_exts_has_actions(exts)) @@ -69,8 +69,7 @@ static int tc_fill_actions(struct stmmac_tc_entry *entry, if (frag) action_entry = frag; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(act, &actions, list) { + tcf_exts_for_each_action(i, act, exts) { /* Accept */ if (is_tcf_gact_ok(act)) { action_entry->val.af = 1; diff --git a/include/net/act_api.h b/include/net/act_api.h index eaa0e8b93d5b..f9c4b871af88 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -23,7 +23,6 @@ struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; - struct list_head list; struct tcf_idrinfo *idrinfo; u32 tcfa_index; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ef727f71336e..c17d51865469 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -298,19 +298,13 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) #endif } -static inline void tcf_exts_to_list(const struct tcf_exts *exts, - struct list_head *actions) -{ #ifdef CONFIG_NET_CLS_ACT - int i; - - for (i = 0; i < exts->nr_actions; i++) { - struct tc_action *a = exts->actions[i]; - - list_add_tail(&a->list, actions); - } +#define tcf_exts_for_each_action(i, a, exts) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) +#else +#define tcf_exts_for_each_action(i, a, exts) \ + for (; 0; ) #endif -} static inline void tcf_exts_stats_update(const struct tcf_exts *exts, @@ -361,6 +355,15 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->actions[0]; +#else + return NULL; +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 962c4fd338ba..1c45c1d6d241 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -767,7 +767,6 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, const struct tc_action *a; struct dsa_port *to_dp; int err = -EOPNOTSUPP; - LIST_HEAD(actions); if (!ds->ops->port_mirror_add) return err; @@ -775,8 +774,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!tcf_exts_has_one_action(cls->exts)) return err; - tcf_exts_to_list(cls->exts, &actions); - a = list_first_entry(&actions, struct tc_action, list); + a = tcf_exts_first_action(cls->exts); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d76948f02a02..db83dac1e7f4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -391,7 +391,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->idrinfo = idrinfo; p->ops = ops; - INIT_LIST_HEAD(&p->list); *a = p; return 0; err3: From a0c2e90fe131d9a7440ac05c9c31fc35dfac2fa8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:10 -0700 Subject: [PATCH 19/77] net_sched: remove unused tcfa_capab Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index f9c4b871af88..970303448c90 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -28,7 +28,6 @@ struct tc_action { u32 tcfa_index; refcount_t tcfa_refcnt; atomic_t tcfa_bindcnt; - u32 tcfa_capab; int tcfa_action; struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; @@ -43,7 +42,6 @@ struct tc_action { #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt -#define tcf_capab common.tcfa_capab #define tcf_action common.tcfa_action #define tcf_tm common.tcfa_tm #define tcf_bstats common.tcfa_bstats From 8ce5be1c899d31f3cd047e5f707cd9dbfb81e076 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:11 -0700 Subject: [PATCH 20/77] Revert "net: sched: act_ife: disable bh when taking ife_mod_lock" This reverts commit 42c625a486f3 ("net: sched: act_ife: disable bh when taking ife_mod_lock"), because what ife_mod_lock protects is absolutely not touched in rate est timer BH context, they have no race. A better fix is following up. Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 92fcf8ba5bca..9decbb74b3ac 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -167,16 +167,16 @@ static struct tcf_meta_ops *find_ife_oplist(u16 metaid) { struct tcf_meta_ops *o; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { if (o->metaid == metaid) { if (!try_module_get(o->owner)) o = NULL; - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return o; } } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); return NULL; } @@ -190,12 +190,12 @@ int register_ife_op(struct tcf_meta_ops *mops) !mops->get || !mops->alloc) return -EINVAL; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid || (strcmp(mops->name, m->name) == 0)) { - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return -EEXIST; } } @@ -204,7 +204,7 @@ int register_ife_op(struct tcf_meta_ops *mops) mops->release = ife_release_meta_gen; list_add_tail(&mops->list, &ifeoplist); - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return 0; } EXPORT_SYMBOL_GPL(unregister_ife_op); @@ -214,7 +214,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) struct tcf_meta_ops *m; int err = -ENOENT; - write_lock_bh(&ife_mod_lock); + write_lock(&ife_mod_lock); list_for_each_entry(m, &ifeoplist, list) { if (m->metaid == mops->metaid) { list_del(&mops->list); @@ -222,7 +222,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops) break; } } - write_unlock_bh(&ife_mod_lock); + write_unlock(&ife_mod_lock); return err; } @@ -343,13 +343,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; - read_lock_bh(&ife_mod_lock); + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } - read_unlock_bh(&ife_mod_lock); + read_unlock(&ife_mod_lock); if (installed) return 0; From 4e407ff5cd67ec76eeeea1deec227b7982dc7f66 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:12 -0700 Subject: [PATCH 21/77] act_ife: move tcfa_lock down to where necessary The only time we need to take tcfa_lock is when adding a new metainfo to an existing ife->metalist. We don't need to take tcfa_lock so early and so broadly in tcf_ife_init(). This means we can always take ife_mod_lock first, avoid the reverse locking ordering warning as reported by Vlad. Reported-by: Vlad Buslov Tested-by: Vlad Buslov Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9decbb74b3ac..244a8cf48183 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -265,11 +265,8 @@ static const char *ife_meta_id2name(u32 metaid) #endif /* called when adding new meta information - * under ife->tcf_lock for existing action */ -static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len, bool exists, - bool rtnl_held) +static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -277,15 +274,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - if (exists) - spin_unlock_bh(&ife->tcf_lock); if (rtnl_held) rtnl_unlock(); request_module("ife-meta-%s", ife_meta_id2name(metaid)); if (rtnl_held) rtnl_lock(); - if (exists) - spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -302,10 +295,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic) + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -332,12 +324,16 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, } } + if (exists) + spin_lock_bh(&ife->tcf_lock); list_add_tail(&mi->metalist, &ife->metalist); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; @@ -345,7 +341,7 @@ static int use_all_metadata(struct tcf_ife_info *ife) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true); + rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -422,7 +418,6 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } -/* under ife->tcf_lock for existing action */ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -436,12 +431,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len, exists, - rtnl_held); + rc = load_metaops_and_vet(i, val, len, rtnl_held); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(ife, i, val, len, false, exists); if (rc) return rc; } @@ -540,8 +534,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p->eth_type = ife_type; } - if (exists) - spin_lock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -551,10 +543,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL, NULL); if (err) { metadata_parse_err: - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } @@ -569,17 +558,16 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { - if (exists) - spin_unlock_bh(&ife->tcf_lock); tcf_idr_release(*a, bind); - kfree(p); return err; } } + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ rcu_swap_protected(ife->params, p, 1); From 5ffe57da29b3802baeddaa40909682bbb4cb4d48 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:13 -0700 Subject: [PATCH 22/77] act_ife: fix a potential deadlock use_all_metadata() acquires read_lock(&ife_mod_lock), then calls add_metainfo() which calls find_ife_oplist() which acquires the same lock again. Deadlock! Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops as an additional parameter and let its callers to decide how to find it. For use_all_metadata(), it already has ops, no need to find it again, just call __add_metainfo() directly. And, as ife_mod_lock is only needed for find_ife_oplist(), this means we can make non-atomic allocation for populate_metalist() now. Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 244a8cf48183..196430aefe87 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -296,22 +296,16 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) /* called when adding new meta information */ -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) +static int __add_metainfo(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; - struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; - if (!ops) - return -ENOENT; - mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (!mi) { - /*put back what find_ife_oplist took */ - module_put(ops->owner); + if (!mi) return -ENOMEM; - } mi->metaid = metaid; mi->ops = ops; @@ -319,7 +313,6 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); - module_put(ops->owner); return ret; } } @@ -333,6 +326,21 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } +static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool exists) +{ + const struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret; + + if (!ops) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + if (ret) + /*put back what find_ife_oplist took */ + module_put(ops->owner); + return ret; +} + static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; @@ -341,7 +349,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); + rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -435,7 +443,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, false, exists); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } From 96c26e04581667e3cd17ed74c2fc3499afea49b8 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 20 Aug 2018 09:54:25 +0900 Subject: [PATCH 23/77] xsk: fix return value of xdp_umem_assign_dev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/ENOTSUPP/EOPNOTSUPP/ in function umem_assign_dev(). This function's return value is directly returned by xsk_bind(). EOPNOTSUPP is bind()'s possible return value. Fixes: f734607e819b ("xsk: refactor xdp_umem_assign_dev()") Signed-off-by: Prashant Bhole Acked-by: Song Liu Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 911ca6d3cb5a..bfe2dbea480b 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) - return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ + return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ bpf.command = XDP_QUERY_XSK_UMEM; rtnl_lock(); err = xdp_umem_query(dev, queue_id); if (err) { - err = err < 0 ? -ENOTSUPP : -EBUSY; + err = err < 0 ? -EOPNOTSUPP : -EBUSY; goto err_rtnl_unlock; } From b845c898b2f1ea458d5453f0fa1da6e2dfce3bb4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Aug 2018 15:55:00 +0200 Subject: [PATCH 24/77] bpf, sockmap: fix sock_hash_alloc and reject zero-sized keys Currently, it is possible to create a sock hash map with key size of 0 and have the kernel return a fd back to user space. This is invalid for hash maps (and kernel also hasn't been tested for zero key size support in general at this point). Thus, reject such configuration. Fixes: 81110384441a ("bpf: sockmap, add hash map support") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Song Liu --- kernel/bpf/sockmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 98e621a29e8e..60ceb0e1fa56 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -2140,7 +2140,9 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return ERR_PTR(-EPERM); /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->value_size != 4 || + if (attr->max_entries == 0 || + attr->key_size == 0 || + attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); From eb29429d81e31b191f3b2bd19cf820279cec6463 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Aug 2018 18:09:17 +0200 Subject: [PATCH 25/77] bpf, sockmap: fix sock hash count in alloc_sock_hash_elem When we try to allocate a new sock hash entry and the allocation fails, then sock hash map fails to reduce the map element counter, meaning we keep accounting this element although it was never used. Fix it by dropping the element counter on error. Fixes: 81110384441a ("bpf: sockmap, add hash map support") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend --- kernel/bpf/sockmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 60ceb0e1fa56..40c6ef9fc828 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -2269,8 +2269,10 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) + if (!l_new) { + atomic_dec(&htab->count); return ERR_PTR(-ENOMEM); + } memcpy(l_new->key, key, key_size); l_new->sk = sk; From 67db7cd249e71f64346f481b629724376d063e08 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:32 -0700 Subject: [PATCH 26/77] tls: possible hang when do_tcp_sendpages hits sndbuf is full case Currently, the lower protocols sk_write_space handler is not called if TLS is sending a scatterlist via tls_push_sg. However, normally tls_push_sg calls do_tcp_sendpage, which may be under memory pressure, that in turn may trigger a wait via sk_wait_event. Typically, this happens when the in-flight bytes exceed the sdnbuf size. In the normal case when enough ACKs are received sk_write_space() will be called and the sk_wait_event will be woken up allowing it to send more data and/or return to the user. But, in the TLS case because the sk_write_space() handler does not wake up the events the above send will wait until the sndtimeo is exceeded. By default this is MAX_SCHEDULE_TIMEOUT so it look like a hang to the user (especially this impatient user). To fix this pass the sk_write_space event to the lower layers sk_write_space event which in the TCP case will wake any pending events. I observed the above while integrating sockmap and ktls. It initially appeared as test_sockmap (modified to use ktls) occasionally hanging. To reliably reproduce this reduce the sndbuf size and stress the tls layer by sending many 1B sends. This results in every byte needing a header and each byte individually being sent to the crypto layer. Signed-off-by: John Fastabend Acked-by: Dave Watson Signed-off-by: Daniel Borkmann --- net/tls/tls_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 93c0c225ab34..180b6640e531 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -213,9 +213,14 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* We are already sending pages, ignore notification */ - if (ctx->in_tcp_sendpages) + /* If in_tcp_sendpages call lower protocol write space handler + * to ensure we wake up any waiting operations there. For example + * if do_tcp_sendpages where to call sk_wait_event. + */ + if (ctx->in_tcp_sendpages) { + ctx->sk_write_space(sk); return; + } if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; From 9b2e0388bec8ec5427403e23faff3b58dd1c3200 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:37 -0700 Subject: [PATCH 27/77] bpf: sockmap: write_space events need to be passed to TCP handler When sockmap code is using the stream parser it also handles the write space events in order to handle the case where (a) verdict redirects skb to another socket and (b) the sockmap then sends the skb but due to memory constraints (or other EAGAIN errors) needs to do a retry. But the initial code missed a third case where the skb_send_sock_locked() triggers an sk_wait_event(). A typically case would be when sndbuf size is exceeded. If this happens because we do not pass the write_space event to the lower layers we never wake up the event and it will wait for sndtimeo. Which as noted in ktls fix may be rather large and look like a hang to the user. To reproduce the best test is to reduce the sndbuf size and send 1B data chunks to stress the memory handling. To fix this pass the event from the upper layer to the lower layer. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 40c6ef9fc828..cf5195c7c331 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1427,12 +1427,15 @@ out: static void smap_write_space(struct sock *sk) { struct smap_psock *psock; + void (*write_space)(struct sock *sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state))) schedule_work(&psock->tx_work); + write_space = psock->save_write_space; rcu_read_unlock(); + write_space(sk); } static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) From 00e1cae78120ee19462e7f96135cd1cc59a086e7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 22 Aug 2018 00:02:19 +0200 Subject: [PATCH 28/77] net: ethernet: renesas: use SPDX identifier for Renesas drivers Signed-off-by: Wolfram Sang Acked-by: Sergei Shtylyov Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 5 +---- drivers/net/ethernet/renesas/ravb_main.c | 5 +---- drivers/net/ethernet/renesas/sh_eth.c | 13 +------------ drivers/net/ethernet/renesas/sh_eth.h | 13 +------------ 4 files changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index b81f4faf7b10..1470fc12282b 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #ifndef __RAVB_H__ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c06f2df895c2..aff5516b781e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet AVB device driver * * Copyright (C) 2014-2015 Renesas Electronics Corporation @@ -5,10 +6,6 @@ * Copyright (C) 2015-2016 Cogent Embedded, Inc. * * Based on the SuperH Ethernet driver - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License version 2, - * as published by the Free Software Foundation. */ #include diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5573199c4536..ad4433d59237 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* SuperH Ethernet device driver * * Copyright (C) 2014 Renesas Electronics Corporation @@ -5,18 +6,6 @@ * Copyright (C) 2008-2014 Renesas Solutions Corp. * Copyright (C) 2013-2017 Cogent Embedded, Inc. * Copyright (C) 2014 Codethink Limited - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #include diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index f94be99cf400..0c18650bbfe6 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu * Copyright (C) 2008-2012 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". */ #ifndef __SH_ETH_H__ From 3d0371b313b84ba7c16ebf2526a7a34f1c57b19e Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 22 Aug 2018 14:57:44 +1000 Subject: [PATCH 29/77] net/ncsi: Fixup .dumpit message flags and ID check in Netlink handler The ncsi_pkg_info_all_nl() .dumpit handler is missing the NLM_F_MULTI flag, causing additional package information after the first to be lost. Also fixup a sanity check in ncsi_write_package_info() to reject out of range package IDs. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 82e6edf9c5d9..45f33d6dedf7 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -100,7 +100,7 @@ static int ncsi_write_package_info(struct sk_buff *skb, bool found; int rc; - if (id > ndp->package_num) { + if (id > ndp->package_num - 1) { netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id); return -ENODEV; } @@ -240,7 +240,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, return 0; /* done */ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); + &ncsi_genl_family, NLM_F_MULTI, NCSI_CMD_PKG_INFO); if (!hdr) { rc = -EMSGSIZE; goto err; From 93cfb6c17690c465509967aeb237717d10513a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 22 Aug 2018 12:29:43 +0200 Subject: [PATCH 30/77] sch_cake: Fix TC filter flow override and expand it to hosts as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TC filter flow mapping override completely skipped the call to cake_hash(); however that meant that the internal state was not being updated, which ultimately leads to deadlocks in some configurations. Fix that by passing the overridden flow ID into cake_hash() instead so it can react appropriately. In addition, the major number of the class ID can now be set to override the host mapping in host isolation mode. If both host and flow are overridden (or if the respective modes are disabled), flow dissection and hashing will be skipped entirely; otherwise, the hashing will be kept for the portions that are not set by the filter. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 4d26b0823cdf..c07c30b916d5 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -620,15 +620,20 @@ static bool cake_ddst(int flow_mode) } static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, - int flow_mode) + int flow_mode, u16 flow_override, u16 host_override) { - u32 flow_hash = 0, srchost_hash, dsthost_hash; + u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; u16 reduced_hash, srchost_idx, dsthost_idx; struct flow_keys keys, host_keys; if (unlikely(flow_mode == CAKE_FLOW_NONE)) return 0; + /* If both overrides are set we can skip packet dissection entirely */ + if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && + (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) + goto skip_hash; + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -675,6 +680,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, if (flow_mode & CAKE_FLOW_FLOWS) flow_hash = flow_hash_from_keys(&keys); +skip_hash: + if (flow_override) + flow_hash = flow_override - 1; + if (host_override) { + dsthost_hash = host_override - 1; + srchost_hash = host_override - 1; + } + if (!(flow_mode & CAKE_FLOW_FLOWS)) { if (flow_mode & CAKE_FLOW_SRC_IP) flow_hash ^= srchost_hash; @@ -1570,7 +1583,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, struct cake_sched_data *q = qdisc_priv(sch); struct tcf_proto *filter; struct tcf_result res; - u32 flow = 0; + u16 flow = 0, host = 0; int result; filter = rcu_dereference_bh(q->filter_list); @@ -1594,10 +1607,12 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, #endif if (TC_H_MIN(res.classid) <= CAKE_QUEUES) flow = TC_H_MIN(res.classid); + if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16)) + host = TC_H_MAJ(res.classid) >> 16; } hash: *t = cake_select_tin(sch, skb); - return flow ?: cake_hash(*t, skb, flow_mode) + 1; + return cake_hash(*t, skb, flow_mode, flow, host) + 1; } static void cake_reconfigure(struct Qdisc *sch); From 191672ca07a7c10c3b84d01019a33d59b4317997 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Aug 2018 17:25:44 +0200 Subject: [PATCH 31/77] net_sched: fix unused variable warning in stmmac The new tcf_exts_for_each_action() macro doesn't reference its arguments when CONFIG_NET_CLS_ACT is disabled, which leads to a harmless warning in at least one driver: drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_fill_actions': drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:64:6: error: unused variable 'i' [-Werror=unused-variable] Adding a cast to void lets us avoid this kind of warning. To be on the safe side, do it for all three arguments, not just the one that caused the warning. Fixes: 244cd96adb5f ("net_sched: remove list_head from tc_action") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c17d51865469..75a3f3fdb359 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -303,7 +303,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = (exts)->actions[i]); i++) #else #define tcf_exts_for_each_action(i, a, exts) \ - for (; 0; ) + for (; 0; (void)(i), (void)(a), (void)(exts)) #endif static inline void From e500c6d349f7f36120886841c6f057ce248b48b2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 Aug 2018 12:58:34 -0700 Subject: [PATCH 32/77] addrconf: reduce unnecessary atomic allocations All the 3 callers of addrconf_add_mroute() assert RTNL lock, they don't take any additional lock either, so it is safe to convert it to GFP_KERNEL. Same for sit_add_v4_addrs(). Cc: David Ahern Signed-off-by: Cong Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fac4ad74867..d51a8c0b3372 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2398,7 +2398,7 @@ static void addrconf_add_mroute(struct net_device *dev) ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, GFP_ATOMIC, NULL); + ip6_route_add(&cfg, GFP_KERNEL, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -3062,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, - GFP_ATOMIC); + GFP_KERNEL); return; } @@ -3087,7 +3087,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, 0, idev->dev, - 0, pflags, GFP_ATOMIC); + 0, pflags, GFP_KERNEL); } } } From 431280eebed9f5079553daf003011097763e71fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2018 13:30:45 -0700 Subject: [PATCH 33/77] ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state tcp uses per-cpu (and per namespace) sockets (net->ipv4.tcp_sk) internally to send some control packets. 1) RST packets, through tcp_v4_send_reset() 2) ACK packets in SYN-RECV and TIME-WAIT state, through tcp_v4_send_ack() These packets assert IP_DF, and also use the hashed IP ident generator to provide an IPv4 ID number. Geoff Alexander reported this could be used to build off-path attacks. These packets should not be fragmented, since their size is smaller than IPV4_MIN_MTU. Only some tunneled paths could eventually have to fragment, regardless of inner IPID. We really can use zero IPID, to address the flaw, and as a bonus, avoid a couple of atomic operations in ip_idents_reserve() Signed-off-by: Eric Dumazet Reported-by: Geoff Alexander Tested-by: Geoff Alexander Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9e041fa5c545..44c09eddbb78 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2517,6 +2517,12 @@ static int __net_init tcp_sk_init(struct net *net) if (res) goto fail; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } From fb99886224294b2291d267da41395022fa4200e2 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:14 -0400 Subject: [PATCH 34/77] tcp_bbr: add bbr_check_probe_rtt_done() helper This patch add a helper function bbr_check_probe_rtt_done() to 1. check the condition to see if bbr should exit probe_rtt mode; 2. process the logic of exiting probe_rtt mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 13d34427ca3d..fd7bccf36a26 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -95,11 +95,10 @@ struct bbr { u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ - restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:12, + unused:13, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -396,17 +395,11 @@ static bool bbr_set_cwnd_to_recover_or_restore( cwnd = tcp_packets_in_flight(tp) + acked; } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { /* Exiting loss recovery; restore cwnd saved before recovery. */ - bbr->restore_cwnd = 1; + cwnd = max(cwnd, bbr->prior_cwnd); bbr->packet_conservation = 0; } bbr->prev_ca_state = state; - if (bbr->restore_cwnd) { - /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->restore_cwnd = 0; - } - if (bbr->packet_conservation) { *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); return true; /* yes, using packet conservation */ @@ -748,6 +741,20 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr_reset_mode(sk); +} + /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and * periodically drain the bottleneck queue, to converge to measure the true * min_rtt (unloaded propagation delay). This allows the flows to keep queues @@ -806,12 +813,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) } else if (bbr->probe_rtt_done_stamp) { if (bbr->round_start) bbr->probe_rtt_round_done = 1; - if (bbr->probe_rtt_round_done && - after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_jiffies32; - bbr->restore_cwnd = 1; /* snap to prior_cwnd */ - bbr_reset_mode(sk); - } + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); } } /* Restart after idle ends only once we process a new S/ACK for data */ @@ -862,7 +865,6 @@ static void bbr_init(struct sock *sk) bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); - bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; bbr->full_bw_reached = 0; From 5490b32dce6932ea7ee8e3b2f76db2957c92af6e Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:15 -0400 Subject: [PATCH 35/77] tcp_bbr: in restart from idle, see if we should exit PROBE_RTT This patch fix the case where BBR does not exit PROBE_RTT mode when it restarts from idle. When BBR restarts from idle and if BBR is in PROBE_RTT mode, BBR should check if it's time to exit PROBE_RTT. If yes, then BBR should exit PROBE_RTT mode and restore the cwnd to its full value. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index fd7bccf36a26..1d4bdd3b5e4d 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -174,6 +174,8 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +static void bbr_check_probe_rtt_done(struct sock *sk); + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -308,6 +310,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) */ if (bbr->mode == BBR_PROBE_BW) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); } } From 8e995bf14fdb7e33681d5c3312b602fa342b878a Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Wed, 22 Aug 2018 17:43:16 -0400 Subject: [PATCH 36/77] tcp_bbr: apply PROBE_RTT cwnd cap even if acked==0 This commit fixes a corner case where TCP BBR would enter PROBE_RTT mode but not reduce its cwnd. If a TCP receiver ACKed less than one full segment, the number of delivered/acked packets was 0, so that bbr_set_cwnd() would short-circuit and exit early, without cutting cwnd to the value we want for PROBE_RTT. The fix is to instead make sure that even when 0 full packets are ACKed, we do apply all the appropriate caps, including the cap that applies in PROBE_RTT mode. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Kevin Yang Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 1d4bdd3b5e4d..02ff2dde9609 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -420,10 +420,10 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = 0, target_cwnd = 0; + u32 cwnd = tp->snd_cwnd, target_cwnd = 0; if (!acked) - return; + goto done; /* no packet fully ACKed; just apply caps */ if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; From 3ed614dce3ca9912d22be215ff0f11104b69fe62 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:10 +0800 Subject: [PATCH 37/77] net: hns: fix length and page_offset overflow when CONFIG_ARM64_64K_PAGES When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of length and page_offset are u16, they will overflow. So change them to u32. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index fa5b30f547f6..cad52bd331f7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -220,10 +220,10 @@ struct hnae_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; + u32 page_offset; + u32 length; /* length of the buffer */ - u16 length; /* length of the buffer */ + u16 reuse_flag; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; From ac4a5b52f5970479f4b2d94a7f98dbf9eaf675ab Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:11 +0800 Subject: [PATCH 38/77] net: hns: modify variable type in hns_nic_reuse_page 'truesize' is supposed to be u32, not int, so fix it. Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 9f2b552aee33..c8c0b0309c27 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -512,7 +512,8 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, struct hnae_desc_cb *desc_cb) { struct hnae_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; From b1ccd4c0ab6ef499f47dd84ed4920502a7147bba Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:12 +0800 Subject: [PATCH 39/77] net: hns: fix skb->truesize underestimation skb->truesize is not meant to be tracking amount of used bytes in a skb, but amount of reserved/consumed bytes in memory. For instance, if we use a single byte in last page fragment, we have to account the full size of the fragment. So skb_add_rx_frag needs to calculate the length of the entire buffer into turesize. Fixes: 9cbe9fd5214e ("net: hns: optimize XGE capability by reducing cpu usage") Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c8c0b0309c27..71bd3bff6c67 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -531,7 +531,7 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, } skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); + size - pull_len, truesize); /* avoid re-using remote pages,flag default unreuse */ if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id())) From 339379a2fb13decd1802b403370cc3cdf21d819f Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:13 +0800 Subject: [PATCH 40/77] net: hns: use eth_get_headlen interface instead of hns_nic_get_headlen Update hns to drop the hns_nic_get_headlen function in favour of eth_get_headlen, and hence also removes now redundant hns_nic_get_headlen. Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 103 +----------------- 1 file changed, 1 insertion(+), 102 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 71bd3bff6c67..02a0ba20fad5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -406,107 +406,6 @@ out_net_tx_busy: return NETDEV_TX_BUSY; } -/** - * hns_nic_get_headlen - determine size of header for RSC/LRO/GRO/FCOE - * @data: pointer to the start of the headers - * @max: total length of section to find headers in - * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, GRO, and RSC offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. - **/ -static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, - unsigned int max_size) -{ - unsigned char *network; - u8 hlen; - - /* this should never happen, but better safe than sorry */ - if (max_size < ETH_HLEN) - return max_size; - - /* initialize network frame pointer */ - network = data; - - /* set first protocol and move network header forward */ - network += ETH_HLEN; - - /* handle any vlan tag if present */ - if (hnae_get_field(flag, HNS_RXD_VLAN_M, HNS_RXD_VLAN_S) - == HNS_RX_FLAG_VLAN_PRESENT) { - if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN)) - return max_size; - - network += VLAN_HLEN; - } - - /* handle L3 protocols */ - if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV4) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct iphdr))) - return max_size; - - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (network[0] & 0x0F) << 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct iphdr)) - return network - data; - - /* record next protocol if header is present */ - } else if (hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S) - == HNS_RX_FLAG_L3ID_IPV6) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct ipv6hdr))) - return max_size; - - /* record next protocol */ - hlen = sizeof(struct ipv6hdr); - } else { - return network - data; - } - - /* relocate pointer to start of L4 header */ - network += hlen; - - /* finally sort out TCP/UDP */ - if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_TCP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct tcphdr))) - return max_size; - - /* access doff as a u8 to avoid unaligned access on ia64 */ - hlen = (network[12] & 0xF0) >> 2; - - /* verify hlen meets minimum size requirements */ - if (hlen < sizeof(struct tcphdr)) - return network - data; - - network += hlen; - } else if (hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S) - == HNS_RX_FLAG_L4ID_UDP) { - if ((typeof(max_size))(network - data) > - (max_size - sizeof(struct udphdr))) - return max_size; - - network += sizeof(struct udphdr); - } - - /* If everything has gone correctly network should be the - * data section of the packet and will be the end of the header. - * If not then it probably represents the end of the last recognized - * header. - */ - if ((typeof(max_size))(network - data) < max_size) - return network - data; - else - return max_size; -} - static void hns_nic_reuse_page(struct sk_buff *skb, int i, struct hnae_ring *ring, int pull_len, struct hnae_desc_cb *desc_cb) @@ -696,7 +595,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, } else { ring->stats.seg_pkt_cnt++; - pull_len = hns_nic_get_headlen(va, bnum_flag, HNS_RX_HEAD_SIZE); + pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); From d23c4b6336ef30898dcdff351f21e633e7a64930 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 23 Aug 2018 11:31:37 +0800 Subject: [PATCH 41/77] net/ipv6: init ip6 anycast rt->dst.input as ip6_input Commit 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") forgot to handle anycast route and init anycast rt->dst.input to ip6_forward. Fix it by setting anycast rt->dst.input back to ip6_input. Fixes: 6edb3c96a5f02 ("net/ipv6: Defer initialization of dst to data path") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7208c16302f6..c4ea13e8360b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -956,7 +956,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL) { + if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; From 27a5959308559fa6afcaa4e6cd81d25bcb2dda7c Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:37:15 +0800 Subject: [PATCH 42/77] net: hns3: fix page_offset overflow when CONFIG_ARM64_64K_PAGES When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of page_offset is u16, it will overflow. So change it to u32, when "CONFIG_ARM64_64K_PAGES" enabled. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index a02a96aee2a2..cb450d7ec8c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -284,11 +284,11 @@ struct hns3_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; - + u32 page_offset; u32 length; /* length of the buffer */ + u16 reuse_flag; + /* desc type, used by the ring user to mark the type of the priv data */ u16 type; }; From 583e7281f1d8234f3a3e483bd6fba7a72d24aa4e Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:37:16 +0800 Subject: [PATCH 43/77] net: hns3: modify variable type in hns3_nic_reuse_page 'truesize' is supposed to be u32, not int, so fix it. Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3554dca7a680..955c4ab18b03 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2019,7 +2019,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, struct hns3_desc_cb *desc_cb) { struct hns3_desc *desc; - int truesize, size; + u32 truesize; + int size; int last_offset; bool twobufs; From 4381147df9098706caa5cf9fda37e53b2fe4871f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:51 -0700 Subject: [PATCH 44/77] ice: Fix multiple static analyser warnings This patch fixes the following smatch errors: 1) Fix "odd binop '0x0 & 0xc'" when performing the bitwise-and with a constant value of zero (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG). Remove a similar bitwise-and with 0 in ice_add_marker_act() and use the right mask ICE_LG_ACT_GENERIC_OFFSET_M in the expression. 2) Fix a similar issue "odd binop '0x0 & 0x1800' in ice_req_irq_msix_misc. 3) Fix "odd binop '0x380000 & 0x7fff8'" in ice_add_marker_act(). Also, use a new define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX instead of magic number '7'. 4) Fix warn: odd binop '0x0 & 0x18' in ice_set_dflt_vsi_ctx() by removing unnecessary logic to explicitly unset bits 3 and 4 in port_vlan_bits. These bits are unset already by the memset on ctxt->info. Reported-by: Dan Carpenter Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 1 + drivers/net/ethernet/intel/ice/ice_common.c | 25 +++++++++++-------- drivers/net/ethernet/intel/ice/ice_main.c | 19 ++++++-------- drivers/net/ethernet/intel/ice/ice_switch.c | 4 +-- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 7541ec2270b3..6d3e11659ba5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -594,6 +594,7 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S) #define ICE_LG_ACT_GENERIC_PRIORITY_S 22 #define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S) +#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX 7 /* Action = 7 - Set Stat count */ #define ICE_LG_ACT_STAT_COUNT 0x7 diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 71d032cc5fa7..d5300b606d5a 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1619,20 +1619,23 @@ __ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, } /* LUT size is only valid for Global and PF table types */ - if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) { + switch (lut_size) { + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128: + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512: flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) && - (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) { - flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & - ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; - } else { + break; + case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K: + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + break; + } + /* fall-through */ + default: status = ICE_ERR_PARAM; goto ice_aq_get_set_rss_lut_exit; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5299caf55a7f..186e764a469a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1352,14 +1352,13 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* Allow all packets untagged/tagged */ + /* By default bits 3 and 4 in port_vlan_flags are 0's which results in + * legacy behavior (show VLAN, DEI, and UP) in descriptor. Also, allow + * all packets untagged/tagged. + */ ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & ICE_AQ_VSI_PVLAN_MODE_M) >> ICE_AQ_VSI_PVLAN_MODE_S); - /* Show VLAN/UP from packets in Rx descriptors */ - ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH & - ICE_AQ_VSI_PVLAN_EMOD_M) >> - ICE_AQ_VSI_PVLAN_EMOD_S); /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -2058,15 +2057,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) skip_req_irq: ice_ena_misc_vector(pf); - val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) | - PFINT_OICR_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M); wr32(hw, PFINT_OICR_CTL, val); /* This enables Admin queue Interrupt causes */ - val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) | - PFINT_FW_CTL_CAUSE_ENA_M; + val = ((pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M); wr32(hw, PFINT_FW_CTL, val); itr_gran = hw->itr_gran_200; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 723d15f1e90b..6b7ec2ae5ad6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -645,14 +645,14 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); - act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; + act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << + ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; /* Third action Marker value */ act |= ICE_LG_ACT_GENERIC; act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; - act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); /* call the fill switch rule to fill the lookup tx rx structure */ From 6efa6239e7f8777dccddcebb643e6d9a0304bf43 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:28:52 -0700 Subject: [PATCH 45/77] ice: Remove unnecessary node owner check There is already a check for owner == ICE_SCHED_NODE_OWNER_LAN at the beginning of ice_sched_update_vsi_child_nodes. Remove the additional check to address the static analysis tool smatch issue "warn: we tested 'owner' before and it was 'false'". Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2e6c1d92cc88..eeae199469b6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1576,8 +1576,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc, return status; } - if (owner == ICE_SCHED_NODE_OWNER_LAN) - vsi->max_lanq[tc] = new_numqs; + vsi->max_lanq[tc] = new_numqs; return status; } From c0203475765f827e7b2eaf0a87222d0766e2cc4b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Aug 2018 23:49:37 +0200 Subject: [PATCH 46/77] bpf: use per htab salt for bucket hash All BPF hash and LRU maps currently have a known and global seed we feed into jhash() which is 0. This is suboptimal, thus fix it by generating a random seed upon hashtab setup time which we can later on feed into jhash() on lookup, update and deletions. Fixes: 0f8e4bd8a1fc8 ("bpf: add hashtable type of eBPF maps") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Reviewed-by: Eduardo Valentin --- kernel/bpf/hashtab.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 04b8eda94e7d..03cc59ee9c95 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "percpu_freelist.h" #include "bpf_lru_list.h" @@ -41,6 +42,7 @@ struct bpf_htab { atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ + u32 hashrnd; }; /* each htab element is struct htab_elem + key + value */ @@ -371,6 +373,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (!htab->buckets) goto free_htab; + htab->hashrnd = get_random_int(); for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); @@ -402,9 +405,9 @@ free_htab: return ERR_PTR(err); } -static inline u32 htab_map_hash(const void *key, u32 key_len) +static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) { - return jhash(key, key_len, 0); + return jhash(key, key_len, hashrnd); } static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) @@ -470,7 +473,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -597,7 +600,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) if (!key) goto find_first_elem; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -824,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -880,7 +883,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -945,7 +948,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -998,7 +1001,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1071,7 +1074,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1103,7 +1106,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; From 5ab522443bd1dafa9e32d6f4b029128efda072de Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:53 -0700 Subject: [PATCH 47/77] ice: Cleanup magic number Use define for the unit size shift of the Rx LAN context descriptor base address instead of the magic number 7. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d23a91665b46..068dbc740b76 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -265,6 +265,7 @@ enum ice_rx_flex_desc_status_error_0_bits { struct ice_rlan_ctx { u16 head; u16 cpuid; /* bigger than needed, see above for reason */ +#define ICE_RLAN_BASE_S 7 u64 base; u16 qlen; #define ICE_RLAN_CTX_DBUF_S 7 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 186e764a469a..7d65e0ed3588 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3983,7 +3983,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring) /* clear the context structure first */ memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; From f8ba7db850350319348b6d3c276f8ba19bc098ef Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Aug 2018 06:28:54 -0700 Subject: [PATCH 48/77] ice: Report stats for allocated queues via ethtool stats It is not safe to have the string table for statistics change order or size over the lifetime of a given netdevice. This is because of the nature of the 3-step process for obtaining stats. First, user space performs a request for the size of the strings table. Second it performs a separate request for the strings themselves, after allocating space for the table. Third, it requests the stats themselves, also allocating space for the table. If the size decreased, there is potential to see garbage data or stats values. In the worst case, we could potentially see stats values become mis-aligned with their strings, so that it looks like a statistic is being reported differently than it actually is. Even worse, if the size increased, there is potential that the strings table or stats table was not allocated large enough and the stats code could access and write to memory it should not, potentially resulting in undefined behavior and system crashes. It isn't even safe if the size always changes under the RTNL lock. This is because the calls take place over multiple user space commands, so it is not possible to hold the RTNL lock for the entire duration of obtaining strings and stats. Further, not all consumers of the ethtool API are the user space ethtool program, and it is possible that one assumes the strings will not change (valid under the current contract), and thus only requests the stats values when requesting stats in a loop. Finally, it's not possible in the general case to detect when the size changes, because it is quite possible that one value which could impact the stat size increased, while another decreased. This would result in the same total number of stats, but reordering them so that stats no longer line up with the strings they belong to. Since only size changes aren't enough, we would need some sort of hash or token to determine when the strings no longer match. This would require extending the ethtool stats commands, but there is no more space in the relevant structures. The real solution to resolve this would be to add a completely new API for stats, probably over netlink. In the ice driver, the only thing impacting the stats that is not constant is the number of queues. Instead of reporting stats for each used queue, report stats for each allocated queue. We do not change the number of queues allocated for a given netdevice, as we pass this into the alloc_etherdev_mq() function to set the num_tx_queues and num_rx_queues. This resolves the potential bugs at the slight cost of displaying many queue statistics which will not be activated. Signed-off-by: Jacob Keller Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 7 +++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 52 +++++++++++++++----- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d8b5fff581e7..ed071ea75f20 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -89,6 +89,13 @@ extern const char ice_drv_ver[]; #define ice_for_each_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) +/* Macros for each allocated tx/rx ring whether used or not in a VSI */ +#define ice_for_each_alloc_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++) + +#define ice_for_each_alloc_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) + struct ice_tc_info { u16 qoffset; u16 qcount; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 1db304c01d10..c71a9b528d6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -26,7 +26,7 @@ static int ice_q_stats_len(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); - return ((np->vsi->num_txq + np->vsi->num_rxq) * + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * (sizeof(struct ice_q_stats) / sizeof(u64))); } @@ -218,7 +218,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_txq(vsi, i) { + ice_for_each_alloc_txq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_packets", i); p += ETH_GSTRING_LEN; @@ -226,7 +226,7 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } - ice_for_each_rxq(vsi, i) { + ice_for_each_alloc_rxq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_packets", i); p += ETH_GSTRING_LEN; @@ -253,6 +253,24 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_STATS: + /* The number (and order) of strings reported *must* remain + * constant for a given netdevice. This function must not + * report a different number based on run time parameters + * (such as the number of queues in use, or the setting of + * a private ethtool flag). This is due to the nature of the + * ethtool stats API. + * + * User space programs such as ethtool must make 3 separate + * ioctl requests, one for size, one for the strings, and + * finally one for the stats. Since these cross into + * user space, changes to the number or size could result in + * undefined memory access or incorrect string<->value + * correlations for statistics. + * + * Even if it appears to be safe, changes to the size or + * order of strings will suffer from race conditions and are + * not safe. + */ return ICE_ALL_STATS_LEN(netdev); default: return -EOPNOTSUPP; @@ -280,18 +298,26 @@ ice_get_ethtool_stats(struct net_device *netdev, /* populate per queue stats */ rcu_read_lock(); - ice_for_each_txq(vsi, j) { + ice_for_each_alloc_txq(vsi, j) { ring = READ_ONCE(vsi->tx_rings[j]); - if (!ring) - continue; - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } - ice_for_each_rxq(vsi, j) { + ice_for_each_alloc_rxq(vsi, j) { ring = READ_ONCE(vsi->rx_rings[j]); - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + if (ring) { + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } else { + data[i++] = 0; + data[i++] = 0; + } } rcu_read_unlock(); @@ -519,7 +545,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) goto done; } - for (i = 0; i < vsi->num_txq; i++) { + for (i = 0; i < vsi->alloc_txq; i++) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@ -551,7 +577,7 @@ process_rx: goto done; } - for (i = 0; i < vsi->num_rxq; i++) { + for (i = 0; i < vsi->alloc_rxq; i++) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; From b29bc220e2c7bd494a4605defcd93b18d5a8cf86 Mon Sep 17 00:00:00 2001 From: Preethi Banala Date: Thu, 9 Aug 2018 06:28:55 -0700 Subject: [PATCH 49/77] ice: Clean control queues only when they are initialized Clean control queues only when they are initialized. One of the ways to validate if the basic initialization is done is by checking value of cq->sq.head and cq->rq.head variables that specify the register address. This patch adds a check to avoid NULL pointer dereference crash when tried to shutdown uninitialized control queue. Signed-off-by: Preethi Banala Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 7c511f144ed6..c064416080e7 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -597,10 +597,14 @@ static enum ice_status ice_init_check_adminq(struct ice_hw *hw) return 0; init_ctrlq_free_rq: - ice_shutdown_rq(hw, cq); - ice_shutdown_sq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } return status; } @@ -706,10 +710,14 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) return; } - ice_shutdown_sq(hw, cq); - ice_shutdown_rq(hw, cq); - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); + if (cq->sq.head) { + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + } + if (cq->rq.head) { + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->rq_lock); + } } /** From 3d6b640efcc1b07709b42dd2e9609401c6f88633 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:28:56 -0700 Subject: [PATCH 50/77] ice: Fix bugs in control queue processing This patch is a consolidation of multiple bug fixes for control queue processing. 1) In ice_clean_adminq_subtask() remove unnecessary reads/writes to registers. The bits PFINT_FW_CTL, PFINT_MBX_CTL and PFINT_SB_CTL are not set when an interrupt arrives, which means that clearing them again can be omitted. 2) Get an accurate value in "pending" by re-reading the control queue head register from the hardware. 3) Fix a corner case involving lost control queue messages by checking for new control messages (using ice_ctrlq_pending) before exiting the cleanup routine. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.c | 5 +++- drivers/net/ethernet/intel/ice/ice_main.c | 26 ++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index c064416080e7..62be72fdc8f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1065,8 +1065,11 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, clean_rq_elem_out: /* Set pending if needed, unlock and return */ - if (pending) + if (pending) { + /* re-read HW head to calculate actual pending messages */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); + } clean_rq_elem_err: mutex_unlock(&cq->rq_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7d65e0ed3588..f3ba4f76b6cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -916,6 +916,21 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) return pending && (i == ICE_DFLT_IRQ_WORK); } +/** + * ice_ctrlq_pending - check if there is a difference between ntc and ntu + * @hw: pointer to hardware info + * @cq: control queue information + * + * returns true if there are pending messages in a queue, false if there aren't + */ +static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u16 ntu; + + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + return cq->rq.next_to_clean != ntu; +} + /** * ice_clean_adminq_subtask - clean the AdminQ rings * @pf: board private structure @@ -923,7 +938,6 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - u32 val; if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) return; @@ -933,9 +947,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - /* re-enable Admin queue interrupt causes */ - val = rd32(hw, PFINT_FW_CTL); - wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M)); + /* There might be a situation where new messages arrive to a control + * queue between processing the last message and clearing the + * EVENT_PENDING bit. So before exiting, check queue head again (using + * ice_ctrlq_pending) and process new messages if any. + */ + if (ice_ctrlq_pending(hw, &hw->adminq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); ice_flush(hw); } From 1eb43fc754485d772b1165118a8fb80c385f0492 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 9 Aug 2018 06:28:57 -0700 Subject: [PATCH 51/77] ice: Use order_base_2 to calculate higher power of 2 Currently, we use a combination of ilog2 and is_power_of_2() to calculate the next power of 2 for the qcount. This appears to be causing a warning on some combinations of GCC and the Linux kernel: MODPOST 1 modules WARNING: "____ilog2_NaN" [ice.ko] undefined! This appears to because because GCC realizes that qcount could be zero in some circumstances and thus attempts to link against the intentionally undefined ___ilog2_NaN function. The order_base_2 function is intentionally defined to return 0 when passed 0 as an argument, and thus will be safe to use here. This not only fixes the warning but makes the resulting code slightly cleaner, and is really what we should have used originally. Also update the comment to make it more clear that we are rounding up, not just incrementing the ilog2 of qcount unconditionally. Signed-off-by: Jacob Keller Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f3ba4f76b6cb..3eff1d2d1543 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1313,11 +1313,8 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) qcount = numq_tc; } - /* find higher power-of-2 of qcount */ - pow = ilog2(qcount); - - if (!is_power_of_2(qcount)) - pow++; + /* find the (rounded up) power-of-2 of qcount */ + pow = order_base_2(qcount); for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { From 5d8778d803e21f235e9bc727b5bd619f02abb88b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 9 Aug 2018 06:28:58 -0700 Subject: [PATCH 52/77] ice: Set VLAN flags correctly In the struct ice_aqc_vsi_props the field port_vlan_flags is an overloaded term because it is used for both port VLANs (PVLANs) and regular VLANs. This is an issue and is very confusing especially when dealing with VFs because normal VLANs and port VLANs are not the same. To fix this the field was renamed to vlan_flags and all of the #define's labeled *_PVLAN_* were renamed to *_VLAN_* if they are not specific to port VLANs. Also in ice_vsi_manage_vlan_stripping, set the ICE_AQ_VSI_VLAN_MODE_ALL bit to allow the driver to add a VLAN tag to all packets it sends. Signed-off-by: Brett Creeley Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 24 +++++++------- drivers/net/ethernet/intel/ice/ice_main.c | 31 +++++++++++-------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 6d3e11659ba5..a0614f472658 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -329,19 +329,19 @@ struct ice_aqc_vsi_props { /* VLAN section */ __le16 pvid; /* VLANS include priority bits */ u8 pvlan_reserved[2]; - u8 port_vlan_flags; -#define ICE_AQ_VSI_PVLAN_MODE_S 0 -#define ICE_AQ_VSI_PVLAN_MODE_M (0x3 << ICE_AQ_VSI_PVLAN_MODE_S) -#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED 0x1 -#define ICE_AQ_VSI_PVLAN_MODE_TAGGED 0x2 -#define ICE_AQ_VSI_PVLAN_MODE_ALL 0x3 + u8 vlan_flags; +#define ICE_AQ_VSI_VLAN_MODE_S 0 +#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S) +#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1 +#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2 +#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3 #define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) -#define ICE_AQ_VSI_PVLAN_EMOD_S 3 -#define ICE_AQ_VSI_PVLAN_EMOD_M (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_STR (0x2 << ICE_AQ_VSI_PVLAN_EMOD_S) -#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_S 3 +#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S) +#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) u8 pvlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3eff1d2d1543..68003fad33d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1367,13 +1367,15 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* By default bits 3 and 4 in port_vlan_flags are 0's which results in - * legacy behavior (show VLAN, DEI, and UP) in descriptor. Also, allow - * all packets untagged/tagged. + + /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy + * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all + * packets untagged/tagged. */ - ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & - ICE_AQ_VSI_PVLAN_MODE_M) >> - ICE_AQ_VSI_PVLAN_MODE_S); + ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL & + ICE_AQ_VSI_VLAN_MODE_M) >> + ICE_AQ_VSI_VLAN_MODE_S); + /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -3732,10 +3734,10 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) enum ice_status status; /* Here we are configuring the VSI to let the driver add VLAN tags by - * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN - * tag insertion happens in the Tx hot path, in ice_tx_map. + * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag + * insertion happens in the Tx hot path, in ice_tx_map. */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3747,7 +3749,7 @@ static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } @@ -3769,12 +3771,15 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) */ if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH; } else { /* Disable stripping. Leave tag in packet */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING; + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; } + /* Allow all packets untagged/tagged */ + ctxt.info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); ctxt.vsi_num = vsi->vsi_num; @@ -3785,7 +3790,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) return -EIO; } - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + vsi->info.vlan_flags = ctxt.info.vlan_flags; return 0; } From 785e76d7a2051a9e28b9134d5388a45b16f5eb72 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 23 Aug 2018 17:46:25 +0100 Subject: [PATCH 53/77] tools: bpftool: return from do_event_pipe() on bad arguments When command line parsing fails in the while loop in do_event_pipe() because the number of arguments is incorrect or because the keyword is unknown, an error message is displayed, but bpftool remains stuck in the loop. Make sure we exit the loop upon failure. Fixes: f412eed9dfde ("tools: bpftool: add simple perf event output reader") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map_perf_ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 1832100d1b27..6d41323be291 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -194,8 +194,10 @@ int do_event_pipe(int argc, char **argv) } while (argc) { - if (argc < 2) + if (argc < 2) { BAD_ARG(); + goto err_close_map; + } if (is_prefix(*argv, "cpu")) { char *endptr; @@ -221,6 +223,7 @@ int do_event_pipe(int argc, char **argv) NEXT_ARG(); } else { BAD_ARG(); + goto err_close_map; } do_all = false; From 3bcd7fa37f33cda8c5639a908e9eb42d856e5d8b Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:28:59 -0700 Subject: [PATCH 54/77] ice: Update to interrupts enabled in OICR Remove the following interrupt causes that are not applicable or not handled: - PFINT_OICR_HLP_RDY_M - PFINT_OICR_CPM_RDY_M - PFINT_OICR_GPIO_M - PFINT_OICR_STORM_DETECT_M Add the following interrupt cause that's actually handled in ice_misc_intr: - PFINT_OICR_PE_CRITERR_M Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 8 -------- drivers/net/ethernet/intel/ice/ice_main.c | 9 +++------ 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 499904874b3f..6076fc87df9d 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -121,10 +121,6 @@ #define PFINT_FW_CTL_CAUSE_ENA_S 30 #define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) #define PFINT_OICR 0x0016CA00 -#define PFINT_OICR_HLP_RDY_S 14 -#define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) -#define PFINT_OICR_CPM_RDY_S 15 -#define PFINT_OICR_CPM_RDY_M BIT(PFINT_OICR_CPM_RDY_S) #define PFINT_OICR_ECC_ERR_S 16 #define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S) #define PFINT_OICR_MAL_DETECT_S 19 @@ -133,10 +129,6 @@ #define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S) #define PFINT_OICR_PCI_EXCEPTION_S 21 #define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S) -#define PFINT_OICR_GPIO_S 22 -#define PFINT_OICR_GPIO_M BIT(PFINT_OICR_GPIO_S) -#define PFINT_OICR_STORM_DETECT_S 24 -#define PFINT_OICR_STORM_DETECT_M BIT(PFINT_OICR_STORM_DETECT_S) #define PFINT_OICR_HMC_ERR_S 26 #define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S) #define PFINT_OICR_PE_CRITERR_S 28 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 68003fad33d1..34be94a30a60 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1704,15 +1704,12 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ rd32(hw, PFINT_OICR); /* read to clear */ - val = (PFINT_OICR_HLP_RDY_M | - PFINT_OICR_CPM_RDY_M | - PFINT_OICR_ECC_ERR_M | + val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | - PFINT_OICR_GPIO_M | - PFINT_OICR_STORM_DETECT_M | - PFINT_OICR_HMC_ERR_M); + PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); From c7f2c42b80ed6009f44e355aefc1e40db9485a9d Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:29:00 -0700 Subject: [PATCH 55/77] ice: Fix a few null pointer dereference issues 1) When ice_ena_msix_range() fails to reserve vectors, a devm_kfree() warning was seen in the error flow path. So check pf->irq_tracker before use in ice_clear_interrupt_scheme(). 2) In ice_vsi_cfg(), check vsi->netdev before use. 3) In ice_get_link_status, check link_up before use. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index d5300b606d5a..ebd701ac9428 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1483,7 +1483,7 @@ enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up) struct ice_phy_info *phy_info; enum ice_status status = 0; - if (!pi) + if (!pi || !link_up) return ICE_ERR_PARAM; phy_info = &pi->phy; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 34be94a30a60..d5d83c8848f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3257,8 +3257,10 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) ice_dis_msix(pf); - devm_kfree(&pf->pdev->dev, pf->irq_tracker); - pf->irq_tracker = NULL; + if (pf->irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; + } } /** @@ -4112,11 +4114,12 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; - ice_set_rx_mode(vsi->netdev); - - err = ice_restore_vlan(vsi); - if (err) - return err; + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); + if (err) + return err; + } err = ice_vsi_cfg_txqs(vsi); if (!err) From dab0588fb616c1774bbf108eab1749dda4ac6942 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 9 Aug 2018 06:29:01 -0700 Subject: [PATCH 56/77] ice: Fix potential return of uninitialized value In ice_vsi_setup_[tx|rx]_rings, err is uninitialized which can result in a garbage value return to the caller. Fix that. Signed-off-by: Jesse Brandeburg Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d5d83c8848f8..e23156515186 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4885,7 +4885,7 @@ int ice_down(struct ice_vsi *vsi) */ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_txq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", @@ -4910,7 +4910,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) */ static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_rxq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", From 43f8b22450f0a721915f1d2c7e3db6dd9573e76b Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Thu, 9 Aug 2018 06:29:02 -0700 Subject: [PATCH 57/77] ice: Change struct members from bool to u8 Recent versions of checkpatch have a new warning based on a documented preference of Linus to not use bool in structures due to wasted space and the size of bool is implementation dependent. For more information, see the email thread at https://lkml.org/lkml/2017/11/21/384. Signed-off-by: Bruce Allan Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 8 ++++---- drivers/net/ethernet/intel/ice/ice_switch.h | 6 +++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 2 +- drivers/net/ethernet/intel/ice/ice_type.h | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index ed071ea75f20..868f4a1d0f72 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -196,9 +196,9 @@ struct ice_vsi { struct list_head tmp_sync_list; /* MAC filters to be synced */ struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ - bool irqs_ready; - bool current_isup; /* Sync 'link up' logging */ - bool stat_offsets_loaded; + u8 irqs_ready; + u8 current_isup; /* Sync 'link up' logging */ + u8 stat_offsets_loaded; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -269,7 +269,7 @@ struct ice_pf { struct ice_hw_port_stats stats; struct ice_hw_port_stats stats_prev; struct ice_hw hw; - bool stat_prev_loaded; /* has previous stats been loaded */ + u8 stat_prev_loaded; /* has previous stats been loaded */ char int_name[ICE_INT_NAME_STR_LEN]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 6f4a0d159dbf..9b8ec128ee31 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -17,7 +17,7 @@ struct ice_vsi_ctx { u16 vsis_unallocated; u16 flags; struct ice_aqc_vsi_props info; - bool alloc_from_pool; + u8 alloc_from_pool; }; enum ice_sw_fwd_act_type { @@ -94,8 +94,8 @@ struct ice_fltr_info { u8 qgrp_size; /* Rule creations populate these indicators basing on the switch type */ - bool lb_en; /* Indicate if packet can be looped back */ - bool lan_en; /* Indicate if packet can be forwarded to the uplink */ + u8 lb_en; /* Indicate if packet can be looped back */ + u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; /* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 567067b650c4..31bc998fe200 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -143,7 +143,7 @@ struct ice_ring { u16 next_to_use; u16 next_to_clean; - bool ring_active; /* is ring online or not */ + u8 ring_active; /* is ring online or not */ /* stats structs */ struct ice_q_stats stats; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 99c8a9a71b5e..97c366e0ca59 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -83,7 +83,7 @@ struct ice_link_status { u64 phy_type_low; u16 max_frame_size; u16 link_speed; - bool lse_ena; /* Link Status Event notification */ + u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; u8 ext_info; @@ -101,7 +101,7 @@ struct ice_phy_info { struct ice_link_status link_info_old; u64 phy_type_low; enum ice_media_type media_type; - bool get_link_info; + u8 get_link_info; }; /* Common HW capabilities for SW use */ @@ -167,7 +167,7 @@ struct ice_nvm_info { u32 oem_ver; /* OEM version info */ u16 sr_words; /* Shadow RAM size in words */ u16 ver; /* NVM package version */ - bool blank_nvm_mode; /* is NVM empty (no FW present) */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; /* Max number of port to queue branches w.r.t topology */ @@ -181,7 +181,7 @@ struct ice_sched_node { struct ice_aqc_txsched_elem_data info; u32 agg_id; /* aggregator group id */ u16 vsi_id; - bool in_use; /* suspended or in use */ + u8 in_use; /* suspended or in use */ u8 tx_sched_layer; /* Logical Layer (1-9) */ u8 num_children; u8 tc_num; @@ -218,7 +218,7 @@ struct ice_sched_vsi_info { struct ice_sched_tx_policy { u16 max_num_vsis; u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS]; - bool rdma_ena; + u8 rdma_ena; }; struct ice_port_info { @@ -243,7 +243,7 @@ struct ice_port_info { struct list_head agg_list; /* lists all aggregator */ u8 lport; #define ICE_LPORT_MASK 0xff - bool is_vf; + u8 is_vf; }; struct ice_switch_info { @@ -287,7 +287,7 @@ struct ice_hw { u8 max_cgds; u8 sw_entry_point_layer; - bool evb_veb; /* true for VEB, false for VEPA */ + u8 evb_veb; /* true for VEB, false for VEPA */ struct ice_bus_info bus; struct ice_nvm_info nvm; struct ice_hw_dev_caps dev_caps; /* device capabilities */ @@ -318,7 +318,7 @@ struct ice_hw { u8 itr_gran_100; u8 itr_gran_50; u8 itr_gran_25; - bool ucast_shared; /* true if VSIs can share unicast addr */ + u8 ucast_shared; /* true if VSIs can share unicast addr */ }; From 3968540ba61e9a19a0c4bda733db70952708d264 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 9 Aug 2018 06:29:03 -0700 Subject: [PATCH 58/77] ice: Trivial formatting fixes 1) Add missing "\n" when printing link event error message. 2) Update dev_err statement in probe. 3) Add function description for ice_clear_pf_cfg. 4) Fix coding style for ice_acquire_nvm. 5) netdev->mtu is unsigned so use %u. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 3 +++ drivers/net/ethernet/intel/ice/ice_main.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_nvm.c | 5 ++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ebd701ac9428..661beea6af79 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -45,6 +45,9 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) /** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure + * + * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port + * configuration, flow director filters, etc.). */ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e23156515186..f1e80eed2fd6 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -901,7 +901,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf)) dev_err(&pf->pdev->dev, - "Could not handle link event"); + "Could not handle link event\n"); break; default: dev_dbg(&pf->pdev->dev, @@ -3284,7 +3284,7 @@ static int ice_probe(struct pci_dev *pdev, err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); if (err) { - dev_err(&pdev->dev, "I/O map error %d\n", err); + dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); return err; } @@ -5252,7 +5252,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) u8 count = 0; if (new_mtu == netdev->mtu) { - netdev_warn(netdev, "mtu is already %d\n", netdev->mtu); + netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 92da0a626ce0..295a8cd87fc1 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -131,9 +131,8 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) * * This function will request NVM ownership. */ -static enum -ice_status ice_acquire_nvm(struct ice_hw *hw, - enum ice_aq_res_access_type access) +static enum ice_status +ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) { if (hw->nvm.blank_nvm_mode) return 0; From a9910c0886470b659a6c3629d6467d5639c327e9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 19 Jul 2018 22:04:07 +0800 Subject: [PATCH 59/77] ixgb: use dma_zalloc_coherent instead of allocator/memset Use dma_zalloc_coherent instead of dma_alloc_coherent followed by memset 0. Signed-off-by: YueHaibing Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 43664adf7a3c..d3e72d0f66ef 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -771,14 +771,13 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter) rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc); rxdr->size = ALIGN(rxdr->size, 4096); - rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, - GFP_KERNEL); + rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, + GFP_KERNEL); if (!rxdr->desc) { vfree(rxdr->buffer_info); return -ENOMEM; } - memset(rxdr->desc, 0, rxdr->size); rxdr->next_to_clean = 0; rxdr->next_to_use = 0; From cf1acec008f8d7761aa3fd7c4bca7e17b2d2512d Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:29 -0700 Subject: [PATCH 60/77] e1000: check on netif_running() before calling e1000_up() When the device is not up, the call to 'e1000_up()' from the error handling path of 'e1000_set_ringparam()' causes a kernel oops with a null-pointer dereference. The null-pointer dereference is triggered in function 'e1000_alloc_rx_buffers()' at line 'buffer_info = &rx_ring->buffer_info[i]'. This bug was reported by COD, a tool for testing kernel module binaries I am building. This bug was also detected by KFI from Dr. Kai Cong. This patch fixes the bug by checking on 'netif_running()' before calling 'e1000_up()' in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index bdb3f8e65ed4..c1e4e94f100f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -644,7 +644,8 @@ err_setup_rx: err_alloc_rx: kfree(txdr); err_alloc_tx: - e1000_up(adapter); + if (netif_running(adapter->netdev)) + e1000_up(adapter); err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; From ee400a3f1bfe7004a3e14b81c38ccc5583c26295 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:30 -0700 Subject: [PATCH 61/77] e1000: ensure to free old tx/rx rings in set_ringparam() In 'e1000_set_ringparam()', the tx_ring and rx_ring are updated with new value and the old tx/rx rings are freed only when the device is up. There are resource leaks on old tx/rx rings when the device is not up. This bug is reported by COD, a tool for testing kernel module binaries I am building. This patch fixes the bug by always calling 'kfree()' on old tx/rx rings in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index c1e4e94f100f..2569a168334c 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -624,14 +624,14 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_old; e1000_free_all_rx_resources(adapter); e1000_free_all_tx_resources(adapter); - kfree(tx_old); - kfree(rx_old); adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); if (err) goto err_setup; } + kfree(tx_old); + kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); return 0; From a798fbac33c4cbfe7d539298623b7af6c3f9525a Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Thu, 26 Jul 2018 10:20:38 -0700 Subject: [PATCH 62/77] igb: Use an advanced ctx descriptor for launchtime On i210, Launchtime (TxTime) requires the usage of an "Advanced Transmit Context Descriptor" for retrieving the timestamp of a packet. The igb driver correctly builds such descriptor on the segmentation flow (i.e. igb_tso()) or on the checksum one (i.e. igb_tx_csum()), but the feature is broken for AF_PACKET if the IGB_TX_FLAGS_VLAN is not set, which happens due to an early return on igb_tx_csum(). This flag is only set by the kernel when a VLAN interface is used, thus we can't just rely on it. Here we are fixing this issue by checking if launchtime is enabled for the current tx_ring before performing the early return. Signed-off-by: Jesus Sanchez-Palencia Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d03c2f0d7592..74416f8a9446 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5816,7 +5816,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_failed: - if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) && + !tx_ring->launchtime_enable) return; goto no_csum; } From 151356270b0761e455ed82bba3353fb494451555 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:04:53 +0800 Subject: [PATCH 63/77] igb: Replace GFP_ATOMIC with GFP_KERNEL in igb_sw_init() igb_sw_init() is never called in atomic context. It calls kzalloc() and kcalloc() with GFP_ATOMIC, which is not necessary. GFP_ATOMIC can be replaced with GFP_KERNEL. This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 74416f8a9446..a32c576c1e65 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3873,7 +3873,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->mac_table = kcalloc(hw->mac.rar_entry_count, sizeof(struct igb_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; @@ -3883,7 +3883,7 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->shadow_vfta) return -ENOMEM; From 69a64658de502c8ca383fe2c5a5208f00b5844cd Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:07:38 +0800 Subject: [PATCH 64/77] igb: Replace mdelay() with msleep() in igb_integrated_phy_loopback() igb_integrated_phy_loopback() is never called in atomic context. It calls mdelay() to busily wait, which is not necessary. mdelay() can be replaced with msleep(). This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f92f7918112d..5acf3b743876 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1649,7 +1649,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - mdelay(500); + msleep(500); return 0; } From 374f78f75be98c72876a4d0311b278cd226effba Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 27 Jul 2018 16:22:31 +0800 Subject: [PATCH 65/77] ixgbe: Replace GFP_ATOMIC with GFP_KERNEL ixgbe_fcoe_ddp_setup(), ixgbe_setup_fcoe_ddp_resources() and ixgbe_sw_init() are never called in atomic context. They call kmalloc(), dma_pool_alloc() and kzalloc() with GFP_ATOMIC, which is not necessary. GFP_ATOMIC can be replaced with GFP_KERNEL. This is found by a static analysis tool named DCNS written by myself. Signed-off-by: Jia-Ju Bai Acked-by: Sebastian Basierski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 94b3165ff543..ccd852ad62a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -192,7 +192,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, } /* alloc the udl from per cpu ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); + ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap; @@ -760,7 +760,7 @@ int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter) return 0; /* Extra buffer to be shared by all DDPs for HW work around */ - buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC); + buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index af4c9ae7f432..663d59ba527a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6201,7 +6201,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, adapter->mac_table = kcalloc(hw->mac.num_rar_entries, sizeof(struct ixgbe_mac_addr), - GFP_ATOMIC); + GFP_KERNEL); if (!adapter->mac_table) return -ENOMEM; From fabf1bce103aa8e3db27ff2cc55f8e0fb0abcc30 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 30 Jul 2018 15:52:48 -0700 Subject: [PATCH 66/77] ixgbe: Prevent unsupported configurations with XDP These changes address comments by Jakub Kicinski on commit 38b7e7f8ae82 ("ixgbe: Do not allow LRO or MTU change with XDP"). Change the MTU check with XDP to allow any supported value and only reject those outside of the range as opposed to rejecting any change when XDP is active. In situations where MTU size is not supported, return -EINVAL instead of -EPERM. Add checks when enabling SRIOV, DCB, or adding L2FW offloaded device as they are not supported with XDP. CC: Jakub Kicinski Signed-off-by: Tony Nguyen Acked-by: Jakub Kicinski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 +++++++++++++++++-- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 ++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 663d59ba527a..9a23d33a47ed 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6620,8 +6620,18 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbe_adapter *adapter = netdev_priv(netdev); if (adapter->xdp_prog) { - e_warn(probe, "MTU cannot be changed while XDP program is loaded\n"); - return -EPERM; + int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + if (new_frame_size > ixgbe_rx_bufsz(ring)) { + e_warn(probe, "Requested MTU size is not supported with XDP\n"); + return -EINVAL; + } + } } /* @@ -8983,6 +8993,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) #ifdef CONFIG_IXGBE_DCB if (tc) { + if (adapter->xdp_prog) { + e_warn(probe, "DCB is not supported with XDP\n"); + + ixgbe_init_interrupt_scheme(adapter); + if (netif_running(dev)) + ixgbe_open(dev); + return -EINVAL; + } + netdev_set_num_tc(dev, tc); ixgbe_set_prio_tc_map(adapter); @@ -9934,6 +9953,11 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) int tcs = adapter->hw_tcs ? : 1; int pool, err; + if (adapter->xdp_prog) { + e_warn(probe, "L2FW offload is not supported with XDP\n"); + return ERR_PTR(-EINVAL); + } + /* The hardware supported by ixgbe only filters on the destination MAC * address. In order to avoid issues we only support offloading modes * where the hardware can actually provide the functionality. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 6f59933cdff7..9264a5f8a5d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -53,6 +53,11 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, struct ixgbe_hw *hw = &adapter->hw; int i; + if (adapter->xdp_prog) { + e_warn(probe, "SRIOV is not supported with XDP\n"); + return -EINVAL; + } + /* Enable VMDq flag so device will be set in VM mode */ adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED | IXGBE_FLAG_VMDQ_ENABLED; From 939b701ad63314f5aa90dcd3d866f73954945209 Mon Sep 17 00:00:00 2001 From: Sebastian Basierski Date: Tue, 31 Jul 2018 18:16:00 +0200 Subject: [PATCH 67/77] ixgbe: fix driver behaviour after issuing VFLR Since VFLR doesn't clear VFMBMEM (VF Mailbox Memory) and is not re-enabling queues correctly we should fix this behavior. Signed-off-by: Sebastian Basierski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 26 +++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 9264a5f8a5d0..3c6f01c41b78 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -693,8 +693,13 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; + u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask); u8 num_tcs = adapter->hw_tcs; + u32 reg_val; + u32 queue; + u32 word; /* remove VLAN filters beloning to this VF */ ixgbe_clear_vf_vlans(adapter, vf); @@ -731,6 +736,27 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) /* reset VF api back to unknown */ adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10; + + /* Restart each queue for given VF */ + for (queue = 0; queue < q_per_pool; queue++) { + unsigned int reg_idx = (vf * q_per_pool) + queue; + + reg_val = IXGBE_READ_REG(hw, IXGBE_PVFTXDCTL(reg_idx)); + + /* Re-enabling only configured queues */ + if (reg_val) { + reg_val |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + reg_val &= ~IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val); + } + } + + /* Clear VF's mailbox memory */ + for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0); + + IXGBE_WRITE_FLUSH(hw); } static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 44cfb2021145..41bcbb337e83 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2518,6 +2518,7 @@ enum { /* Translated register #defines */ #define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) #define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) +#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) #define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) #define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) From fa38e30ac73fbb01d7e5d0fd1b12d412fa3ac3ee Mon Sep 17 00:00:00 2001 From: Martyna Szapar Date: Tue, 7 Aug 2018 17:11:23 -0700 Subject: [PATCH 68/77] i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled If interface is connected to switch port configured for DCB there are TX timeouts when bringing up interface. Problem started appearing after adding in i40e driver code mqprio hardware offload mode. In function i40e_vsi_configure_bw_alloc was added resetting BW rate which should be executing when mqprio qdisc is removed but was also when there was no mqprio qdisc added and DCB was enabled. In this patch was added additional check for DCB flag so now when DCB is enabled the correct DCB configs from before mqprio patch are restored. Signed-off-by: Martyna Szapar Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f2c622e78802..ac685ad4d877 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5122,15 +5122,17 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + struct i40e_pf *pf = vsi->back; i40e_status ret; int i; - if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) + /* There is no need to reset BW when mqprio mode is on. */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - if (!vsi->mqprio_qopt.qopt.hw) { + if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "Failed to reset tx rate for vsi->seid %u\n", vsi->seid); return ret; @@ -5139,12 +5141,11 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; - ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data, - NULL); + ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n", - vsi->back->hw.aq.asq_last_status); + pf->hw.aq.asq_last_status); return -EINVAL; } From 07f3701387dcab3a4fb0166098fb2754a1b927e1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 20 Aug 2018 08:12:27 -0700 Subject: [PATCH 69/77] i40e: fix condition of WARN_ONCE for stat strings Commit 9b10df596bd4 ("i40e: use WARN_ONCE to replace the commented BUG_ON size check") introduced a warning check to make sure that the size of the stat strings was always the expected value. This code accidentally inverted the check of the data pointer. Fix this so that we accurately count the size of the stats we copied in. This fixes an erroneous WARN kernel splat that occurs when requesting ethtool statistics. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Tested-by: Mauro S M Rodrigues Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index abcd096ede14..5ff6caa83948 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2013,7 +2013,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data) for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i); - WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, + WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN, "stat strings count mismatch!"); } From 602b74eda81311dbdb5dbab08c30f789f648ebdc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 24 Aug 2018 15:41:35 +0300 Subject: [PATCH 70/77] mlxsw: spectrum_switchdev: Do not leak RIFs when removing bridge When a bridge device is removed, the VLANs are flushed from each configured port. This causes the ports to decrement the reference count on the associated FIDs (filtering identifier). If the reference count of a FID is 1 and it has a RIF (router interface), then this RIF is destroyed. However, if no port is member in the VLAN for which a RIF exists, then the RIF will continue to exist after the removal of the bridge. To reproduce: # ip link add name br0 type bridge vlan_filtering 1 # ip link set dev swp1 master br0 # ip link add link br0 name br0.10 type vlan id 10 # ip address add 192.0.2.0/24 dev br0.10 # ip link del dev br0 The RIF associated with br0.10 continues to exist. Fix this by iterating over all the bridge device uppers when it is destroyed and take care of destroying their RIFs. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++++++ .../mellanox/mlxsw/spectrum_switchdev.c | 20 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3ae930196741..3cdb7aca90b7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -414,6 +414,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3a96307f51b0..2ab9cf25a08a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6234,6 +6234,17 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_vr_put(mlxsw_sp, vr); } +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return; + mlxsw_sp_rif_destroy(rif); +} + static void mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0d8444aaba01..db715da7bab7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -127,6 +127,24 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); } +static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, + void *data) +{ + struct mlxsw_sp *mlxsw_sp = data; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + return 0; +} + +static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + netdev_walk_all_upper_dev_rcu(dev, + mlxsw_sp_bridge_device_upper_rif_destroy, + mlxsw_sp); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) @@ -165,6 +183,8 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, + bridge_device->dev); list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; From ab5f11055fdf8dfc3ddbd89e8e3cc550de41d1d3 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 21 Aug 2018 17:35:48 +0200 Subject: [PATCH 71/77] net: macb: Fix regression breaking non-MDIO fixed-link PHYs commit 739de9a1563a ("net: macb: Reorganize macb_mii bringup") broke initializing macb on the EVB-KSZ9477 eval board. There, of_mdiobus_register was called even for the fixed-link representing the RGMII-link to the switch with the result that the driver attempts to enumerate PHYs on a non-existent MDIO bus: libphy: MACB_mii_bus: probed mdio_bus f0028000.ethernet-ffffffff: fixed-link has invalid PHY address mdio_bus f0028000.ethernet-ffffffff: scan phy fixed-link at address 0 [snip] mdio_bus f0028000.ethernet-ffffffff: scan phy fixed-link at address 31 The "MDIO" bus registration succeeds regardless, having claimed the reset GPIO, and calling of_phy_register_fixed_link later on fails because it tries to claim the same GPIO: macb f0028000.ethernet: broken fixed-link specification Fix this by registering the fixed-link before calling mdiobus_register. Fixes: 739de9a1563a ("net: macb: Reorganize macb_mii bringup") Signed-off-by: Ahmad Fatoum Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 27 +++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dc09f9a8a49b..f46b854d34b5 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -482,11 +482,6 @@ static int macb_mii_probe(struct net_device *dev) if (np) { if (of_phy_is_fixed_link(np)) { - if (of_phy_register_fixed_link(np) < 0) { - dev_err(&bp->pdev->dev, - "broken fixed-link specification\n"); - return -ENODEV; - } bp->phy_node = of_node_get(np); } else { bp->phy_node = of_parse_phandle(np, "phy-handle", 0); @@ -569,7 +564,7 @@ static int macb_mii_init(struct macb *bp) { struct macb_platform_data *pdata; struct device_node *np; - int err; + int err = -ENXIO; /* Enable management port */ macb_writel(bp, NCR, MACB_BIT(MPE)); @@ -592,12 +587,23 @@ static int macb_mii_init(struct macb *bp) dev_set_drvdata(&bp->dev->dev, bp->mii_bus); np = bp->pdev->dev.of_node; - if (pdata) - bp->mii_bus->phy_mask = pdata->phy_mask; + if (np && of_phy_is_fixed_link(np)) { + if (of_phy_register_fixed_link(np) < 0) { + dev_err(&bp->pdev->dev, + "broken fixed-link specification %pOF\n", np); + goto err_out_free_mdiobus; + } + + err = mdiobus_register(bp->mii_bus); + } else { + if (pdata) + bp->mii_bus->phy_mask = pdata->phy_mask; + + err = of_mdiobus_register(bp->mii_bus, np); + } - err = of_mdiobus_register(bp->mii_bus, np); if (err) - goto err_out_free_mdiobus; + goto err_out_free_fixed_link; err = macb_mii_probe(bp->dev); if (err) @@ -607,6 +613,7 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); +err_out_free_fixed_link: if (np && of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); err_out_free_mdiobus: From f7b9e8e111e0ce04ed7d1a1cb5b01b6e57775708 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2018 15:27:23 +0200 Subject: [PATCH 72/77] Revert "net: stmmac: fix build failure due to missing COMMON_CLK dependency" This reverts commit bde4975310eb1982bd0bbff673989052d92fd481. All legacy clock implementations now implement clk_set_rate() (Some implementations may be dummies, though). Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index edf20361ea5f..bf4acebb6bcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -33,7 +33,7 @@ config DWMAC_DWC_QOS_ETH select PHYLIB select CRC32 select MII - depends on OF && COMMON_CLK && HAS_DMA + depends on OF && HAS_DMA help Support for chips using the snps,dwc-qos-ethernet.txt DT binding. @@ -57,7 +57,7 @@ config DWMAC_ANARION config DWMAC_IPQ806X tristate "QCA IPQ806x DWMAC support" default ARCH_QCOM - depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) + depends on OF && (ARCH_QCOM || COMPILE_TEST) select MFD_SYSCON help Support for QCA IPQ806X DWMAC Ethernet. @@ -100,7 +100,7 @@ config DWMAC_OXNAS config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP - depends on OF && COMMON_CLK && (ARCH_ROCKCHIP || COMPILE_TEST) + depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST) select MFD_SYSCON help Support for Ethernet controller on Rockchip RK3288 SoC. @@ -123,7 +123,7 @@ config DWMAC_SOCFPGA config DWMAC_STI tristate "STi GMAC support" default ARCH_STI - depends on OF && COMMON_CLK && (ARCH_STI || COMPILE_TEST) + depends on OF && (ARCH_STI || COMPILE_TEST) select MFD_SYSCON ---help--- Support for ethernet controller on STi SOCs. @@ -147,7 +147,7 @@ config DWMAC_STM32 config DWMAC_SUNXI tristate "Allwinner GMAC support" default ARCH_SUNXI - depends on OF && COMMON_CLK && (ARCH_SUNXI || COMPILE_TEST) + depends on OF && (ARCH_SUNXI || COMPILE_TEST) ---help--- Support for Allwinner A20/A31 GMAC ethernet controllers. From 0da70f808029476001109b6cb076737bc04cea2e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 23 Aug 2018 10:45:22 +0300 Subject: [PATCH 73/77] net: macb: do not disable MDIO bus at open/close time macb_reset_hw() is called from macb_close() and indirectly from macb_open(). macb_reset_hw() zeroes the NCR register, including the MPE (Management Port Enable) bit. This will prevent accessing any other PHYs for other Ethernet MACs on the MDIO bus, which remains registered at macb_reset_hw() time, until macb_init_hw() is called from macb_open() which sets the MPE bit again. I.e. currently the MDIO bus has a short disruption at open time and is disabled at close time until the interface is opened again. Fix that by only touching the RE and TE bits when enabling and disabling RX/TX. v2: Make macb_init_hw() NCR write a single statement. Fixes: 6c36a7074436 ("macb: Use generic PHY layer") Signed-off-by: Anssi Hannula Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f46b854d34b5..c6707ea2d751 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2035,14 +2035,17 @@ static void macb_reset_hw(struct macb *bp) { struct macb_queue *queue; unsigned int q; + u32 ctrl = macb_readl(bp, NCR); /* Disable RX and TX (XXX: Should we halt the transmission * more gracefully?) */ - macb_writel(bp, NCR, 0); + ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE)); /* Clear the stats registers (XXX: Update stats first?) */ - macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); + ctrl |= MACB_BIT(CLRSTAT); + + macb_writel(bp, NCR, ctrl); /* Clear all status flags */ macb_writel(bp, TSR, -1); @@ -2230,7 +2233,7 @@ static void macb_init_hw(struct macb *bp) } /* Enable TX and RX */ - macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); } /* The hash address register is 64 bits long and takes up two From 6750c87074c5b534d82fdaabb1deb45b8f1f57de Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 23 Aug 2018 13:20:52 -0700 Subject: [PATCH 74/77] qlge: Fix netdev features configuration. qlge_fix_features() is not supposed to modify hardware or driver state, rather it is supposed to only fix requested fetures bits. Currently qlge_fix_features() also goes for interface down and up unnecessarily if there is not even any change in features set. This patch changes/fixes following - 1) Move reload of interface or device re-config from qlge_fix_features() to qlge_set_features(). 2) Reload of interface in qlge_set_features() only if relevant feature bit (NETIF_F_HW_VLAN_CTAG_RX) is changed. 3) Get rid of qlge_fix_features() since driver is not really required to fix any features bit. Signed-off-by: Manish Reviewed-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 23 +++++++------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 353f1c129af1..059ba9429e51 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2384,26 +2384,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev, return status; } -static netdev_features_t qlge_fix_features(struct net_device *ndev, - netdev_features_t features) -{ - int err; - - /* Update the behavior of vlan accel in the adapter */ - err = qlge_update_hw_vlan_features(ndev, features); - if (err) - return err; - - return features; -} - static int qlge_set_features(struct net_device *ndev, netdev_features_t features) { netdev_features_t changed = ndev->features ^ features; + int err; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + /* Update the behavior of vlan accel in the adapter */ + err = qlge_update_hw_vlan_features(ndev, features); + if (err) + return err; - if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); + } return 0; } @@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_netdev_ops = { .ndo_set_mac_address = qlge_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = qlge_tx_timeout, - .ndo_fix_features = qlge_fix_features, .ndo_set_features = qlge_set_features, .ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid, From 2d66f997f0545c8f7fc5cf0b49af1decb35170e7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 24 Aug 2018 16:53:13 +0800 Subject: [PATCH 75/77] vhost: correctly check the iova range when waking virtqueue We don't wakeup the virtqueue if the first byte of pending iova range is the last byte of the range we just got updated. This will lead a virtqueue to wait for IOTLB updating forever. Fixing by correct the check and wake up the virtqueue in this case. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Peter Xu Signed-off-by: Jason Wang Reviewed-by: Peter Xu Tested-by: Peter Xu Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 96c1d8400822..b13c6b4b2c66 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -952,7 +952,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && - msg->iova + msg->size - 1 > vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); From e75d039a54090470b7a42e803fd4f9398390f907 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Aug 2018 12:18:30 +0100 Subject: [PATCH 76/77] qed: fix spelling mistake "comparsion" -> "comparison" Trivial fix to spelling mistake in DP_ERR error message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_init_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index d9ab5add27a8..34193c2f1699 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -407,7 +407,7 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, if (i == QED_INIT_MAX_POLL_COUNT) { DP_ERR(p_hwfn, - "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n", + "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparison %08x)]\n", addr, le32_to_cpu(cmd->expected_val), val, le32_to_cpu(cmd->op_data)); } From 98c8f125fd8a6240ea343c1aa50a1be9047791b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Aug 2018 22:58:01 -0700 Subject: [PATCH 77/77] net: sched: Fix memory exposure from short TCA_U32_SEL Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink policy, so max length isn't enforced, only minimum. This means nkeys (from userspace) was being trusted without checking the actual size of nla_len(), which could lead to a memory over-read, and ultimately an exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within a namespace. Reported-by: Al Viro Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5d2a6dc3921..f218ccf1e2d9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -914,6 +914,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid, flags = 0; + size_t sel_size; int err; #ifdef CONFIG_CLS_U32_PERF size_t size; @@ -1076,8 +1077,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } s = nla_data(tb[TCA_U32_SEL]); + sel_size = struct_size(s, keys, s->nkeys); + if (nla_len(tb[TCA_U32_SEL]) < sel_size) { + err = -EINVAL; + goto erridr; + } - n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); if (n == NULL) { err = -ENOBUFS; goto erridr; @@ -1092,7 +1098,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&n->sel, s, sel_size); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;