net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)

Add a VF driver for Microsoft Azure Network Adapter (MANA) that will be
available in the future.

Co-developed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Co-developed-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Dexuan Cui 2021-04-16 13:11:59 -07:00 committed by David S. Miller
parent 83c1ca257a
commit ca9c54d2d6
15 changed files with 6156 additions and 1 deletions

View File

@ -8267,11 +8267,12 @@ S: Maintained
T: git git://linuxtv.org/media_tree.git
F: drivers/media/i2c/hi556.c
Hyper-V CORE AND DRIVERS
Hyper-V/Azure CORE AND DRIVERS
M: "K. Y. Srinivasan" <kys@microsoft.com>
M: Haiyang Zhang <haiyangz@microsoft.com>
M: Stephen Hemminger <sthemmin@microsoft.com>
M: Wei Liu <wei.liu@kernel.org>
M: Dexuan Cui <decui@microsoft.com>
L: linux-hyperv@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
@ -8288,6 +8289,7 @@ F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/iommu/hyperv-iommu.c
F: drivers/net/ethernet/microsoft/
F: drivers/net/hyperv/
F: drivers/pci/controller/pci-hyperv-intf.c
F: drivers/pci/controller/pci-hyperv.c

View File

@ -82,6 +82,7 @@ source "drivers/net/ethernet/huawei/Kconfig"
source "drivers/net/ethernet/i825xx/Kconfig"
source "drivers/net/ethernet/ibm/Kconfig"
source "drivers/net/ethernet/intel/Kconfig"
source "drivers/net/ethernet/microsoft/Kconfig"
source "drivers/net/ethernet/xscale/Kconfig"
config JME

View File

@ -45,6 +45,7 @@ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
obj-$(CONFIG_NET_VENDOR_MICROSOFT) += microsoft/
obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/
obj-$(CONFIG_JME) += jme.o
obj-$(CONFIG_KORINA) += korina.o

View File

@ -0,0 +1,29 @@
#
# Microsoft Azure network device configuration
#
config NET_VENDOR_MICROSOFT
bool "Microsoft Network Devices"
default y
help
If you have a network (Ethernet) device belonging to this class, say Y.
Note that the answer to this question doesn't directly affect the
kernel: saying N will just cause the configurator to skip the
question about Microsoft network devices. If you say Y, you will be
asked for your specific device in the following question.
if NET_VENDOR_MICROSOFT
config MICROSOFT_MANA
tristate "Microsoft Azure Network Adapter (MANA) support"
depends on PCI_MSI && X86_64
select PCI_HYPERV
help
This driver supports Microsoft Azure Network Adapter (MANA).
So far, the driver is only supported on X86_64.
To compile this driver as a module, choose M here.
The module will be called mana.
endif #NET_VENDOR_MICROSOFT

View File

@ -0,0 +1,5 @@
#
# Makefile for the Microsoft Azure network device driver.
#
obj-$(CONFIG_MICROSOFT_MANA) += mana/

View File

@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
# Makefile for the Microsoft Azure Network Adapter driver
obj-$(CONFIG_MICROSOFT_MANA) += mana.o
mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o

View File

@ -0,0 +1,673 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Copyright (c) 2021, Microsoft Corporation. */
#ifndef _GDMA_H
#define _GDMA_H
#include <linux/dma-mapping.h>
#include <linux/netdevice.h>
#include "shm_channel.h"
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
enum gdma_request_type {
GDMA_VERIFY_VF_DRIVER_VERSION = 1,
GDMA_QUERY_MAX_RESOURCES = 2,
GDMA_LIST_DEVICES = 3,
GDMA_REGISTER_DEVICE = 4,
GDMA_DEREGISTER_DEVICE = 5,
GDMA_GENERATE_TEST_EQE = 10,
GDMA_CREATE_QUEUE = 12,
GDMA_DISABLE_QUEUE = 13,
GDMA_CREATE_DMA_REGION = 25,
GDMA_DMA_REGION_ADD_PAGES = 26,
GDMA_DESTROY_DMA_REGION = 27,
};
enum gdma_queue_type {
GDMA_INVALID_QUEUE,
GDMA_SQ,
GDMA_RQ,
GDMA_CQ,
GDMA_EQ,
};
enum gdma_work_request_flags {
GDMA_WR_NONE = 0,
GDMA_WR_OOB_IN_SGL = BIT(0),
GDMA_WR_PAD_BY_SGE0 = BIT(1),
};
enum gdma_eqe_type {
GDMA_EQE_COMPLETION = 3,
GDMA_EQE_TEST_EVENT = 64,
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
};
enum {
GDMA_DEVICE_NONE = 0,
GDMA_DEVICE_HWC = 1,
GDMA_DEVICE_MANA = 2,
};
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
/* The bitmap size in bits. */
u32 size;
/* The bitmap tracks the resources. */
unsigned long *map;
};
union gdma_doorbell_entry {
u64 as_uint64;
struct {
u64 id : 24;
u64 reserved : 8;
u64 tail_ptr : 31;
u64 arm : 1;
} cq;
struct {
u64 id : 24;
u64 wqe_cnt : 8;
u64 tail_ptr : 32;
} rq;
struct {
u64 id : 24;
u64 reserved : 8;
u64 tail_ptr : 32;
} sq;
struct {
u64 id : 16;
u64 reserved : 16;
u64 tail_ptr : 31;
u64 arm : 1;
} eq;
}; /* HW DATA */
struct gdma_msg_hdr {
u32 hdr_type;
u32 msg_type;
u16 msg_version;
u16 hwc_msg_id;
u32 msg_size;
}; /* HW DATA */
struct gdma_dev_id {
union {
struct {
u16 type;
u16 instance;
};
u32 as_uint32;
};
}; /* HW DATA */
struct gdma_req_hdr {
struct gdma_msg_hdr req;
struct gdma_msg_hdr resp; /* The expected response */
struct gdma_dev_id dev_id;
u32 activity_id;
}; /* HW DATA */
struct gdma_resp_hdr {
struct gdma_msg_hdr response;
struct gdma_dev_id dev_id;
u32 activity_id;
u32 status;
u32 reserved;
}; /* HW DATA */
struct gdma_general_req {
struct gdma_req_hdr hdr;
}; /* HW DATA */
#define GDMA_MESSAGE_V1 1
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
#define GDMA_STANDARD_HEADER_TYPE 0
static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code,
u32 req_size, u32 resp_size)
{
hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE;
hdr->req.msg_type = code;
hdr->req.msg_version = GDMA_MESSAGE_V1;
hdr->req.msg_size = req_size;
hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE;
hdr->resp.msg_type = code;
hdr->resp.msg_version = GDMA_MESSAGE_V1;
hdr->resp.msg_size = resp_size;
}
/* The 16-byte struct is part of the GDMA work queue entry (WQE). */
struct gdma_sge {
u64 address;
u32 mem_key;
u32 size;
}; /* HW DATA */
struct gdma_wqe_request {
struct gdma_sge *sgl;
u32 num_sge;
u32 inline_oob_size;
const void *inline_oob_data;
u32 flags;
u32 client_data_unit;
};
enum gdma_page_type {
GDMA_PAGE_TYPE_4K,
};
#define GDMA_INVALID_DMA_REGION 0
struct gdma_mem_info {
struct device *dev;
dma_addr_t dma_handle;
void *virt_addr;
u64 length;
/* Allocated by the PF driver */
u64 gdma_region;
};
#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
struct gdma_dev {
struct gdma_context *gdma_context;
struct gdma_dev_id dev_id;
u32 pdid;
u32 doorbell;
u32 gpa_mkey;
/* GDMA driver specific pointer */
void *driver_data;
};
#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE
#define GDMA_CQE_SIZE 64
#define GDMA_EQE_SIZE 16
#define GDMA_MAX_SQE_SIZE 512
#define GDMA_MAX_RQE_SIZE 256
#define GDMA_COMP_DATA_SIZE 0x3C
#define GDMA_EVENT_DATA_SIZE 0xC
/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */
#define GDMA_WQE_BU_SIZE 32
#define INVALID_PDID UINT_MAX
#define INVALID_DOORBELL UINT_MAX
#define INVALID_MEM_KEY UINT_MAX
#define INVALID_QUEUE_ID UINT_MAX
#define INVALID_PCI_MSIX_INDEX UINT_MAX
struct gdma_comp {
u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
u32 wq_num;
bool is_sq;
};
struct gdma_event {
u32 details[GDMA_EVENT_DATA_SIZE / 4];
u8 type;
};
struct gdma_queue;
#define CQE_POLLING_BUFFER 512
struct mana_eq {
struct gdma_queue *eq;
struct gdma_comp cqe_poll[CQE_POLLING_BUFFER];
};
typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
struct gdma_event *e);
typedef void gdma_cq_callback(void *context, struct gdma_queue *q);
/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE
* (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the
* driver increases the 'head' in BUs rather than in bytes, and notifies
* the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track
* the HW head, and increases the 'head' by 1 for every processed EQE/CQE.
*
* The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is
* processed, the driver increases the 'tail' to indicate that WQEs have
* been consumed by the HW, so the driver can post new WQEs into the SQ/RQ.
*
* The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures
* that the EQ/CQ is big enough so they can't overflow, and the driver uses
* the owner bits mechanism to detect if the queue has become empty.
*/
struct gdma_queue {
struct gdma_dev *gdma_dev;
enum gdma_queue_type type;
u32 id;
struct gdma_mem_info mem_info;
void *queue_mem_ptr;
u32 queue_size;
bool monitor_avl_buf;
u32 head;
u32 tail;
/* Extra fields specific to EQ/CQ. */
union {
struct {
bool disable_needed;
gdma_eq_callback *callback;
void *context;
unsigned int msix_index;
u32 log2_throttle_limit;
/* NAPI data */
struct napi_struct napi;
int work_done;
int budget;
} eq;
struct {
gdma_cq_callback *callback;
void *context;
struct gdma_queue *parent; /* For CQ/EQ relationship */
} cq;
};
};
struct gdma_queue_spec {
enum gdma_queue_type type;
bool monitor_avl_buf;
unsigned int queue_size;
/* Extra fields specific to EQ/CQ. */
union {
struct {
gdma_eq_callback *callback;
void *context;
unsigned long log2_throttle_limit;
/* Only used by the MANA device. */
struct net_device *ndev;
} eq;
struct {
gdma_cq_callback *callback;
void *context;
struct gdma_queue *parent_eq;
} cq;
};
};
struct gdma_irq_context {
void (*handler)(void *arg);
void *arg;
};
struct gdma_context {
struct device *dev;
/* Per-vPort max number of queues */
unsigned int max_num_queues;
unsigned int max_num_msix;
unsigned int num_msix_usable;
struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
/* This maps a CQ index to the queue structure. */
unsigned int max_num_cqs;
struct gdma_queue **cq_table;
/* Protect eq_test_event and test_event_eq_id */
struct mutex eq_test_event_mutex;
struct completion eq_test_event;
u32 test_event_eq_id;
void __iomem *bar0_va;
void __iomem *shm_base;
void __iomem *db_page_base;
u32 db_page_size;
/* Shared memory chanenl (used to bootstrap HWC) */
struct shm_channel shm_channel;
/* Hardware communication channel (HWC) */
struct gdma_dev hwc;
/* Azure network adapter */
struct gdma_dev mana;
};
#define MAX_NUM_GDMA_DEVICES 4
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
{
return gd->dev_id.type == GDMA_DEVICE_MANA;
}
static inline bool mana_gd_is_hwc(struct gdma_dev *gd)
{
return gd->dev_id.type == GDMA_DEVICE_HWC;
}
u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset);
u32 mana_gd_wq_avail_space(struct gdma_queue *wq);
int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq);
int mana_gd_create_hwc_queue(struct gdma_dev *gd,
const struct gdma_queue_spec *spec,
struct gdma_queue **queue_ptr);
int mana_gd_create_mana_eq(struct gdma_dev *gd,
const struct gdma_queue_spec *spec,
struct gdma_queue **queue_ptr);
int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
const struct gdma_queue_spec *spec,
struct gdma_queue **queue_ptr);
void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
void mana_gd_arm_cq(struct gdma_queue *cq);
struct gdma_wqe {
u32 reserved :24;
u32 last_vbytes :8;
union {
u32 flags;
struct {
u32 num_sge :8;
u32 inline_oob_size_div4:3;
u32 client_oob_in_sgl :1;
u32 reserved1 :4;
u32 client_data_unit :14;
u32 reserved2 :2;
};
};
}; /* HW DATA */
#define INLINE_OOB_SMALL_SIZE 8
#define INLINE_OOB_LARGE_SIZE 24
#define MAX_TX_WQE_SIZE 512
#define MAX_RX_WQE_SIZE 256
struct gdma_cqe {
u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
union {
u32 as_uint32;
struct {
u32 wq_num : 24;
u32 is_sq : 1;
u32 reserved : 4;
u32 owner_bits : 3;
};
} cqe_info;
}; /* HW DATA */
#define GDMA_CQE_OWNER_BITS 3
#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1)
#define SET_ARM_BIT 1
#define GDMA_EQE_OWNER_BITS 3
union gdma_eqe_info {
u32 as_uint32;
struct {
u32 type : 8;
u32 reserved1 : 8;
u32 client_id : 2;
u32 reserved2 : 11;
u32 owner_bits : 3;
};
}; /* HW DATA */
#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1)
#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries))
struct gdma_eqe {
u32 details[GDMA_EVENT_DATA_SIZE / 4];
u32 eqe_info;
}; /* HW DATA */
#define GDMA_REG_DB_PAGE_OFFSET 8
#define GDMA_REG_DB_PAGE_SIZE 0x10
#define GDMA_REG_SHM_OFFSET 0x18
struct gdma_posted_wqe_info {
u32 wqe_size_in_bu;
};
/* GDMA_GENERATE_TEST_EQE */
struct gdma_generate_test_event_req {
struct gdma_req_hdr hdr;
u32 queue_index;
}; /* HW DATA */
/* GDMA_VERIFY_VF_DRIVER_VERSION */
enum {
GDMA_PROTOCOL_V1 = 1,
GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1,
GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1,
};
struct gdma_verify_ver_req {
struct gdma_req_hdr hdr;
/* Mandatory fields required for protocol establishment */
u64 protocol_ver_min;
u64 protocol_ver_max;
u64 drv_cap_flags1;
u64 drv_cap_flags2;
u64 drv_cap_flags3;
u64 drv_cap_flags4;
/* Advisory fields */
u64 drv_ver;
u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */
u32 reserved;
u32 os_ver_major;
u32 os_ver_minor;
u32 os_ver_build;
u32 os_ver_platform;
u64 reserved_2;
u8 os_ver_str1[128];
u8 os_ver_str2[128];
u8 os_ver_str3[128];
u8 os_ver_str4[128];
}; /* HW DATA */
struct gdma_verify_ver_resp {
struct gdma_resp_hdr hdr;
u64 gdma_protocol_ver;
u64 pf_cap_flags1;
u64 pf_cap_flags2;
u64 pf_cap_flags3;
u64 pf_cap_flags4;
}; /* HW DATA */
/* GDMA_QUERY_MAX_RESOURCES */
struct gdma_query_max_resources_resp {
struct gdma_resp_hdr hdr;
u32 status;
u32 max_sq;
u32 max_rq;
u32 max_cq;
u32 max_eq;
u32 max_db;
u32 max_mst;
u32 max_cq_mod_ctx;
u32 max_mod_cq;
u32 max_msix;
}; /* HW DATA */
/* GDMA_LIST_DEVICES */
struct gdma_list_devices_resp {
struct gdma_resp_hdr hdr;
u32 num_of_devs;
u32 reserved;
struct gdma_dev_id devs[64];
}; /* HW DATA */
/* GDMA_REGISTER_DEVICE */
struct gdma_register_device_resp {
struct gdma_resp_hdr hdr;
u32 pdid;
u32 gpa_mkey;
u32 db_id;
}; /* HW DATA */
/* GDMA_CREATE_QUEUE */
struct gdma_create_queue_req {
struct gdma_req_hdr hdr;
u32 type;
u32 reserved1;
u32 pdid;
u32 doolbell_id;
u64 gdma_region;
u32 reserved2;
u32 queue_size;
u32 log2_throttle_limit;
u32 eq_pci_msix_index;
u32 cq_mod_ctx_id;
u32 cq_parent_eq_id;
u8 rq_drop_on_overrun;
u8 rq_err_on_wqe_overflow;
u8 rq_chain_rec_wqes;
u8 sq_hw_db;
u32 reserved3;
}; /* HW DATA */
struct gdma_create_queue_resp {
struct gdma_resp_hdr hdr;
u32 queue_index;
}; /* HW DATA */
/* GDMA_DISABLE_QUEUE */
struct gdma_disable_queue_req {
struct gdma_req_hdr hdr;
u32 type;
u32 queue_index;
u32 alloc_res_id_on_creation;
}; /* HW DATA */
/* GDMA_CREATE_DMA_REGION */
struct gdma_create_dma_region_req {
struct gdma_req_hdr hdr;
/* The total size of the DMA region */
u64 length;
/* The offset in the first page */
u32 offset_in_page;
/* enum gdma_page_type */
u32 gdma_page_type;
/* The total number of pages */
u32 page_count;
/* If page_addr_list_len is smaller than page_count,
* the remaining page addresses will be added via the
* message GDMA_DMA_REGION_ADD_PAGES.
*/
u32 page_addr_list_len;
u64 page_addr_list[];
}; /* HW DATA */
struct gdma_create_dma_region_resp {
struct gdma_resp_hdr hdr;
u64 gdma_region;
}; /* HW DATA */
/* GDMA_DMA_REGION_ADD_PAGES */
struct gdma_dma_region_add_pages_req {
struct gdma_req_hdr hdr;
u64 gdma_region;
u32 page_addr_list_len;
u32 reserved3;
u64 page_addr_list[];
}; /* HW DATA */
/* GDMA_DESTROY_DMA_REGION */
struct gdma_destroy_dma_region_req {
struct gdma_req_hdr hdr;
u64 gdma_region;
}; /* HW DATA */
int mana_gd_verify_vf_version(struct pci_dev *pdev);
int mana_gd_register_device(struct gdma_dev *gd);
int mana_gd_deregister_device(struct gdma_dev *gd);
int mana_gd_post_work_request(struct gdma_queue *wq,
const struct gdma_wqe_request *wqe_req,
struct gdma_posted_wqe_info *wqe_info);
int mana_gd_post_and_ring(struct gdma_queue *queue,
const struct gdma_wqe_request *wqe,
struct gdma_posted_wqe_info *wqe_info);
int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r);
void mana_gd_free_res_map(struct gdma_resource *r);
void mana_gd_wq_ring_doorbell(struct gdma_context *gc,
struct gdma_queue *queue);
int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
struct gdma_mem_info *gmi);
void mana_gd_free_memory(struct gdma_mem_info *gmi);
int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
u32 resp_len, void *resp);
#endif /* _GDMA_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,843 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */
#include "gdma.h"
#include "hw_channel.h"
static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
{
struct gdma_resource *r = &hwc->inflight_msg_res;
unsigned long flags;
u32 index;
down(&hwc->sema);
spin_lock_irqsave(&r->lock, flags);
index = find_first_zero_bit(hwc->inflight_msg_res.map,
hwc->inflight_msg_res.size);
bitmap_set(hwc->inflight_msg_res.map, index, 1);
spin_unlock_irqrestore(&r->lock, flags);
*msg_id = index;
return 0;
}
static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id)
{
struct gdma_resource *r = &hwc->inflight_msg_res;
unsigned long flags;
spin_lock_irqsave(&r->lock, flags);
bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1);
spin_unlock_irqrestore(&r->lock, flags);
up(&hwc->sema);
}
static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
const struct gdma_resp_hdr *resp_msg,
u32 resp_len)
{
if (resp_len < sizeof(*resp_msg))
return -EPROTO;
if (resp_len > caller_ctx->output_buflen)
return -EPROTO;
return 0;
}
static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
const struct gdma_resp_hdr *resp_msg)
{
struct hwc_caller_ctx *ctx;
int err = -EPROTO;
if (!test_bit(resp_msg->response.hwc_msg_id,
hwc->inflight_msg_res.map)) {
dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
resp_msg->response.hwc_msg_id);
return;
}
ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id;
err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len);
if (err)
goto out;
ctx->status_code = resp_msg->status;
memcpy(ctx->output_buf, resp_msg, resp_len);
out:
ctx->error = err;
complete(&ctx->comp_event);
}
static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
struct hwc_work_request *req)
{
struct device *dev = hwc_rxq->hwc->dev;
struct gdma_sge *sge;
int err;
sge = &req->sge;
sge->address = (u64)req->buf_sge_addr;
sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
sge->size = req->buf_len;
memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
req->wqe_req.sgl = sge;
req->wqe_req.num_sge = 1;
req->wqe_req.client_data_unit = 0;
err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
if (err)
dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
return err;
}
static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
struct gdma_event *event)
{
struct hw_channel_context *hwc = ctx;
struct gdma_dev *gd = hwc->gdma_dev;
union hwc_init_type_data type_data;
union hwc_init_eq_id_db eq_db;
u32 type, val;
switch (event->type) {
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
eq_db.as_uint32 = event->details[0];
hwc->cq->gdma_eq->id = eq_db.eq_id;
gd->doorbell = eq_db.doorbell;
break;
case GDMA_EQE_HWC_INIT_DATA:
type_data.as_uint32 = event->details[0];
type = type_data.type;
val = type_data.value;
switch (type) {
case HWC_INIT_DATA_CQID:
hwc->cq->gdma_cq->id = val;
break;
case HWC_INIT_DATA_RQID:
hwc->rxq->gdma_wq->id = val;
break;
case HWC_INIT_DATA_SQID:
hwc->txq->gdma_wq->id = val;
break;
case HWC_INIT_DATA_QUEUE_DEPTH:
hwc->hwc_init_q_depth_max = (u16)val;
break;
case HWC_INIT_DATA_MAX_REQUEST:
hwc->hwc_init_max_req_msg_size = val;
break;
case HWC_INIT_DATA_MAX_RESPONSE:
hwc->hwc_init_max_resp_msg_size = val;
break;
case HWC_INIT_DATA_MAX_NUM_CQS:
gd->gdma_context->max_num_cqs = val;
break;
case HWC_INIT_DATA_PDID:
hwc->gdma_dev->pdid = val;
break;
case HWC_INIT_DATA_GPA_MKEY:
hwc->rxq->msg_buf->gpa_mkey = val;
hwc->txq->msg_buf->gpa_mkey = val;
break;
}
break;
case GDMA_EQE_HWC_INIT_DONE:
complete(&hwc->hwc_init_eqe_comp);
break;
default:
/* Ignore unknown events, which should never happen. */
break;
}
}
static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
const struct hwc_rx_oob *rx_oob)
{
struct hw_channel_context *hwc = ctx;
struct hwc_wq *hwc_rxq = hwc->rxq;
struct hwc_work_request *rx_req;
struct gdma_resp_hdr *resp;
struct gdma_wqe *dma_oob;
struct gdma_queue *rq;
struct gdma_sge *sge;
u64 rq_base_addr;
u64 rx_req_idx;
u8 *wqe;
if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id))
return;
rq = hwc_rxq->gdma_wq;
wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE);
dma_oob = (struct gdma_wqe *)wqe;
sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4);
/* Select the RX work request for virtual address and for reposting. */
rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle;
rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size;
rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx];
resp = (struct gdma_resp_hdr *)rx_req->buf_va;
if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) {
dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n",
resp->response.hwc_msg_id);
return;
}
mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp);
/* Do no longer use 'resp', because the buffer is posted to the HW
* in the below mana_hwc_post_rx_wqe().
*/
resp = NULL;
mana_hwc_post_rx_wqe(hwc_rxq, rx_req);
}
static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,
const struct hwc_rx_oob *rx_oob)
{
struct hw_channel_context *hwc = ctx;
struct hwc_wq *hwc_txq = hwc->txq;
WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id);
}
static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc,
enum gdma_queue_type type, u64 queue_size,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
if (type != GDMA_SQ && type != GDMA_RQ)
return -EINVAL;
spec.type = type;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc,
u64 queue_size,
void *ctx, gdma_cq_callback *cb,
struct gdma_queue *parent_eq,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
spec.type = GDMA_CQ;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
spec.cq.context = ctx;
spec.cq.callback = cb;
spec.cq.parent_eq = parent_eq;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc,
u64 queue_size,
void *ctx, gdma_eq_callback *cb,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
spec.type = GDMA_EQ;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
spec.eq.context = ctx;
spec.eq.callback = cb;
spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
{
struct hwc_rx_oob comp_data = {};
struct gdma_comp *completions;
struct hwc_cq *hwc_cq = ctx;
u32 comp_read, i;
WARN_ON_ONCE(hwc_cq->gdma_cq != q_self);
completions = hwc_cq->comp_buf;
comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth);
WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth);
for (i = 0; i < comp_read; ++i) {
comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data;
if (completions[i].is_sq)
hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx,
completions[i].wq_num,
&comp_data);
else
hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx,
completions[i].wq_num,
&comp_data);
}
mana_gd_arm_cq(q_self);
}
static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
{
if (!hwc_cq)
return;
kfree(hwc_cq->comp_buf);
if (hwc_cq->gdma_cq)
mana_gd_destroy_queue(gc, hwc_cq->gdma_cq);
if (hwc_cq->gdma_eq)
mana_gd_destroy_queue(gc, hwc_cq->gdma_eq);
kfree(hwc_cq);
}
static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth,
gdma_eq_callback *callback, void *ctx,
hwc_rx_event_handler_t *rx_ev_hdlr,
void *rx_ev_ctx,
hwc_tx_event_handler_t *tx_ev_hdlr,
void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr)
{
struct gdma_queue *eq, *cq;
struct gdma_comp *comp_buf;
struct hwc_cq *hwc_cq;
u32 eq_size, cq_size;
int err;
eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth);
if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE)
eq_size = MINIMUM_SUPPORTED_PAGE_SIZE;
cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth);
if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE)
cq_size = MINIMUM_SUPPORTED_PAGE_SIZE;
hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL);
if (!hwc_cq)
return -ENOMEM;
err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err);
goto out;
}
hwc_cq->gdma_eq = eq;
err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event,
eq, &cq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err);
goto out;
}
hwc_cq->gdma_cq = cq;
comp_buf = kcalloc(q_depth, sizeof(struct gdma_comp), GFP_KERNEL);
if (!comp_buf) {
err = -ENOMEM;
goto out;
}
hwc_cq->hwc = hwc;
hwc_cq->comp_buf = comp_buf;
hwc_cq->queue_depth = q_depth;
hwc_cq->rx_event_handler = rx_ev_hdlr;
hwc_cq->rx_event_ctx = rx_ev_ctx;
hwc_cq->tx_event_handler = tx_ev_hdlr;
hwc_cq->tx_event_ctx = tx_ev_ctx;
*hwc_cq_ptr = hwc_cq;
return 0;
out:
mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
return err;
}
static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth,
u32 max_msg_size,
struct hwc_dma_buf **dma_buf_ptr)
{
struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_work_request *hwc_wr;
struct hwc_dma_buf *dma_buf;
struct gdma_mem_info *gmi;
void *virt_addr;
u32 buf_size;
u8 *base_pa;
int err;
u16 i;
dma_buf = kzalloc(sizeof(*dma_buf) +
q_depth * sizeof(struct hwc_work_request),
GFP_KERNEL);
if (!dma_buf)
return -ENOMEM;
dma_buf->num_reqs = q_depth;
buf_size = PAGE_ALIGN(q_depth * max_msg_size);
gmi = &dma_buf->mem_info;
err = mana_gd_alloc_memory(gc, buf_size, gmi);
if (err) {
dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err);
goto out;
}
virt_addr = dma_buf->mem_info.virt_addr;
base_pa = (u8 *)dma_buf->mem_info.dma_handle;
for (i = 0; i < q_depth; i++) {
hwc_wr = &dma_buf->reqs[i];
hwc_wr->buf_va = virt_addr + i * max_msg_size;
hwc_wr->buf_sge_addr = base_pa + i * max_msg_size;
hwc_wr->buf_len = max_msg_size;
}
*dma_buf_ptr = dma_buf;
return 0;
out:
kfree(dma_buf);
return err;
}
static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc,
struct hwc_dma_buf *dma_buf)
{
if (!dma_buf)
return;
mana_gd_free_memory(&dma_buf->mem_info);
kfree(dma_buf);
}
static void mana_hwc_destroy_wq(struct hw_channel_context *hwc,
struct hwc_wq *hwc_wq)
{
if (!hwc_wq)
return;
mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf);
if (hwc_wq->gdma_wq)
mana_gd_destroy_queue(hwc->gdma_dev->gdma_context,
hwc_wq->gdma_wq);
kfree(hwc_wq);
}
static int mana_hwc_create_wq(struct hw_channel_context *hwc,
enum gdma_queue_type q_type, u16 q_depth,
u32 max_msg_size, struct hwc_cq *hwc_cq,
struct hwc_wq **hwc_wq_ptr)
{
struct gdma_queue *queue;
struct hwc_wq *hwc_wq;
u32 queue_size;
int err;
WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ);
if (q_type == GDMA_RQ)
queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth);
else
queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth);
if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE)
queue_size = MINIMUM_SUPPORTED_PAGE_SIZE;
hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL);
if (!hwc_wq)
return -ENOMEM;
err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue);
if (err)
goto out;
err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size,
&hwc_wq->msg_buf);
if (err)
goto out;
hwc_wq->hwc = hwc;
hwc_wq->gdma_wq = queue;
hwc_wq->queue_depth = q_depth;
hwc_wq->hwc_cq = hwc_cq;
*hwc_wq_ptr = hwc_wq;
return 0;
out:
if (err)
mana_hwc_destroy_wq(hwc, hwc_wq);
return err;
}
static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq,
struct hwc_work_request *req,
u32 dest_virt_rq_id, u32 dest_virt_rcq_id,
bool dest_pf)
{
struct device *dev = hwc_txq->hwc->dev;
struct hwc_tx_oob *tx_oob;
struct gdma_sge *sge;
int err;
if (req->msg_size == 0 || req->msg_size > req->buf_len) {
dev_err(dev, "wrong msg_size: %u, buf_len: %u\n",
req->msg_size, req->buf_len);
return -EINVAL;
}
tx_oob = &req->tx_oob;
tx_oob->vrq_id = dest_virt_rq_id;
tx_oob->dest_vfid = 0;
tx_oob->vrcq_id = dest_virt_rcq_id;
tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id;
tx_oob->loopback = false;
tx_oob->lso_override = false;
tx_oob->dest_pf = dest_pf;
tx_oob->vsq_id = hwc_txq->gdma_wq->id;
sge = &req->sge;
sge->address = (u64)req->buf_sge_addr;
sge->mem_key = hwc_txq->msg_buf->gpa_mkey;
sge->size = req->msg_size;
memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
req->wqe_req.sgl = sge;
req->wqe_req.num_sge = 1;
req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob);
req->wqe_req.inline_oob_data = tx_oob;
req->wqe_req.client_data_unit = 0;
err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL);
if (err)
dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err);
return err;
}
static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc,
u16 num_msg)
{
int err;
sema_init(&hwc->sema, num_msg);
err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res);
if (err)
dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err);
return err;
}
static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth,
u32 max_req_msg_size, u32 max_resp_msg_size)
{
struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_wq *hwc_rxq = hwc->rxq;
struct hwc_work_request *req;
struct hwc_caller_ctx *ctx;
int err;
int i;
/* Post all WQEs on the RQ */
for (i = 0; i < q_depth; i++) {
req = &hwc_rxq->msg_buf->reqs[i];
err = mana_hwc_post_rx_wqe(hwc_rxq, req);
if (err)
return err;
}
ctx = kzalloc(q_depth * sizeof(struct hwc_caller_ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
for (i = 0; i < q_depth; ++i)
init_completion(&ctx[i].comp_event);
hwc->caller_ctx = ctx;
return mana_gd_test_eq(gc, hwc->cq->gdma_eq);
}
static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth,
u32 *max_req_msg_size,
u32 *max_resp_msg_size)
{
struct hw_channel_context *hwc = gc->hwc.driver_data;
struct gdma_queue *rq = hwc->rxq->gdma_wq;
struct gdma_queue *sq = hwc->txq->gdma_wq;
struct gdma_queue *eq = hwc->cq->gdma_eq;
struct gdma_queue *cq = hwc->cq->gdma_cq;
int err;
init_completion(&hwc->hwc_init_eqe_comp);
err = mana_smc_setup_hwc(&gc->shm_channel, false,
eq->mem_info.dma_handle,
cq->mem_info.dma_handle,
rq->mem_info.dma_handle,
sq->mem_info.dma_handle,
eq->eq.msix_index);
if (err)
return err;
if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ))
return -ETIMEDOUT;
*q_depth = hwc->hwc_init_q_depth_max;
*max_req_msg_size = hwc->hwc_init_max_req_msg_size;
*max_resp_msg_size = hwc->hwc_init_max_resp_msg_size;
if (WARN_ON(cq->id >= gc->max_num_cqs))
return -EPROTO;
gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *));
if (!gc->cq_table)
return -ENOMEM;
gc->cq_table[cq->id] = cq;
return 0;
}
static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth,
u32 max_req_msg_size, u32 max_resp_msg_size)
{
struct hwc_wq *hwc_rxq = NULL;
struct hwc_wq *hwc_txq = NULL;
struct hwc_cq *hwc_cq = NULL;
int err;
err = mana_hwc_init_inflight_msg(hwc, q_depth);
if (err)
return err;
/* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ
* queue depth and RQ queue depth.
*/
err = mana_hwc_create_cq(hwc, q_depth * 2,
mana_hwc_init_event_handler, hwc,
mana_hwc_rx_event_handler, hwc,
mana_hwc_tx_event_handler, hwc, &hwc_cq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err);
goto out;
}
hwc->cq = hwc_cq;
err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size,
hwc_cq, &hwc_rxq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err);
goto out;
}
hwc->rxq = hwc_rxq;
err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size,
hwc_cq, &hwc_txq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err);
goto out;
}
hwc->txq = hwc_txq;
hwc->num_inflight_msg = q_depth;
hwc->max_req_msg_size = max_req_msg_size;
return 0;
out:
if (hwc_txq)
mana_hwc_destroy_wq(hwc, hwc_txq);
if (hwc_rxq)
mana_hwc_destroy_wq(hwc, hwc_rxq);
if (hwc_cq)
mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
mana_gd_free_res_map(&hwc->inflight_msg_res);
return err;
}
int mana_hwc_create_channel(struct gdma_context *gc)
{
u32 max_req_msg_size, max_resp_msg_size;
struct gdma_dev *gd = &gc->hwc;
struct hw_channel_context *hwc;
u16 q_depth_max;
int err;
hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
if (!hwc)
return -ENOMEM;
gd->gdma_context = gc;
gd->driver_data = hwc;
hwc->gdma_dev = gd;
hwc->dev = gc->dev;
/* HWC's instance number is always 0. */
gd->dev_id.as_uint32 = 0;
gd->dev_id.type = GDMA_DEVICE_HWC;
gd->pdid = INVALID_PDID;
gd->doorbell = INVALID_DOORBELL;
err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
HW_CHANNEL_MAX_REQUEST_SIZE,
HW_CHANNEL_MAX_RESPONSE_SIZE);
if (err) {
dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err);
goto out;
}
err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size,
&max_resp_msg_size);
if (err) {
dev_err(hwc->dev, "Failed to establish HWC: %d\n", err);
goto out;
}
err = mana_hwc_test_channel(gc->hwc.driver_data,
HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
max_req_msg_size, max_resp_msg_size);
if (err) {
dev_err(hwc->dev, "Failed to test HWC: %d\n", err);
goto out;
}
return 0;
out:
kfree(hwc);
return err;
}
void mana_hwc_destroy_channel(struct gdma_context *gc)
{
struct hw_channel_context *hwc = gc->hwc.driver_data;
struct hwc_caller_ctx *ctx;
mana_smc_teardown_hwc(&gc->shm_channel, false);
ctx = hwc->caller_ctx;
kfree(ctx);
hwc->caller_ctx = NULL;
mana_hwc_destroy_wq(hwc, hwc->txq);
hwc->txq = NULL;
mana_hwc_destroy_wq(hwc, hwc->rxq);
hwc->rxq = NULL;
mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
hwc->cq = NULL;
mana_gd_free_res_map(&hwc->inflight_msg_res);
hwc->num_inflight_msg = 0;
if (hwc->gdma_dev->pdid != INVALID_PDID) {
hwc->gdma_dev->doorbell = INVALID_DOORBELL;
hwc->gdma_dev->pdid = INVALID_PDID;
}
kfree(hwc);
gc->hwc.driver_data = NULL;
gc->hwc.gdma_context = NULL;
}
int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
const void *req, u32 resp_len, void *resp)
{
struct hwc_work_request *tx_wr;
struct hwc_wq *txq = hwc->txq;
struct gdma_req_hdr *req_msg;
struct hwc_caller_ctx *ctx;
u16 msg_id;
int err;
mana_hwc_get_msg_index(hwc, &msg_id);
tx_wr = &txq->msg_buf->reqs[msg_id];
if (req_len > tx_wr->buf_len) {
dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len,
tx_wr->buf_len);
err = -EINVAL;
goto out;
}
ctx = hwc->caller_ctx + msg_id;
ctx->output_buf = resp;
ctx->output_buflen = resp_len;
req_msg = (struct gdma_req_hdr *)tx_wr->buf_va;
if (req)
memcpy(req_msg, req, req_len);
req_msg->req.hwc_msg_id = msg_id;
tx_wr->msg_size = req_len;
err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false);
if (err) {
dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err);
goto out;
}
if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) {
dev_err(hwc->dev, "HWC: Request timed out!\n");
err = -ETIMEDOUT;
goto out;
}
if (ctx->error) {
err = ctx->error;
goto out;
}
if (ctx->status_code) {
dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
ctx->status_code);
err = -EPROTO;
goto out;
}
out:
mana_hwc_put_msg_index(hwc, msg_id);
return err;
}

View File

@ -0,0 +1,190 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Copyright (c) 2021, Microsoft Corporation. */
#ifndef _HW_CHANNEL_H
#define _HW_CHANNEL_H
#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4
#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000
#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000
#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1
#define HWC_INIT_DATA_CQID 1
#define HWC_INIT_DATA_RQID 2
#define HWC_INIT_DATA_SQID 3
#define HWC_INIT_DATA_QUEUE_DEPTH 4
#define HWC_INIT_DATA_MAX_REQUEST 5
#define HWC_INIT_DATA_MAX_RESPONSE 6
#define HWC_INIT_DATA_MAX_NUM_CQS 7
#define HWC_INIT_DATA_PDID 8
#define HWC_INIT_DATA_GPA_MKEY 9
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
union hwc_init_eq_id_db {
u32 as_uint32;
struct {
u32 eq_id : 16;
u32 doorbell : 16;
};
}; /* HW DATA */
union hwc_init_type_data {
u32 as_uint32;
struct {
u32 value : 24;
u32 type : 8;
};
}; /* HW DATA */
struct hwc_rx_oob {
u32 type : 6;
u32 eom : 1;
u32 som : 1;
u32 vendor_err : 8;
u32 reserved1 : 16;
u32 src_virt_wq : 24;
u32 src_vfid : 8;
u32 reserved2;
union {
u32 wqe_addr_low;
u32 wqe_offset;
};
u32 wqe_addr_high;
u32 client_data_unit : 14;
u32 reserved3 : 18;
u32 tx_oob_data_size;
u32 chunk_offset : 21;
u32 reserved4 : 11;
}; /* HW DATA */
struct hwc_tx_oob {
u32 reserved1;
u32 reserved2;
u32 vrq_id : 24;
u32 dest_vfid : 8;
u32 vrcq_id : 24;
u32 reserved3 : 8;
u32 vscq_id : 24;
u32 loopback : 1;
u32 lso_override: 1;
u32 dest_pf : 1;
u32 reserved4 : 5;
u32 vsq_id : 24;
u32 reserved5 : 8;
}; /* HW DATA */
struct hwc_work_request {
void *buf_va;
void *buf_sge_addr;
u32 buf_len;
u32 msg_size;
struct gdma_wqe_request wqe_req;
struct hwc_tx_oob tx_oob;
struct gdma_sge sge;
};
/* hwc_dma_buf represents the array of in-flight WQEs.
* mem_info as know as the GDMA mapped memory is partitioned and used by
* in-flight WQEs.
* The number of WQEs is determined by the number of in-flight messages.
*/
struct hwc_dma_buf {
struct gdma_mem_info mem_info;
u32 gpa_mkey;
u32 num_reqs;
struct hwc_work_request reqs[];
};
typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id,
const struct hwc_rx_oob *rx_oob);
typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id,
const struct hwc_rx_oob *rx_oob);
struct hwc_cq {
struct hw_channel_context *hwc;
struct gdma_queue *gdma_cq;
struct gdma_queue *gdma_eq;
struct gdma_comp *comp_buf;
u16 queue_depth;
hwc_rx_event_handler_t *rx_event_handler;
void *rx_event_ctx;
hwc_tx_event_handler_t *tx_event_handler;
void *tx_event_ctx;
};
struct hwc_wq {
struct hw_channel_context *hwc;
struct gdma_queue *gdma_wq;
struct hwc_dma_buf *msg_buf;
u16 queue_depth;
struct hwc_cq *hwc_cq;
};
struct hwc_caller_ctx {
struct completion comp_event;
void *output_buf;
u32 output_buflen;
u32 error; /* Linux error code */
u32 status_code;
};
struct hw_channel_context {
struct gdma_dev *gdma_dev;
struct device *dev;
u16 num_inflight_msg;
u32 max_req_msg_size;
u16 hwc_init_q_depth_max;
u32 hwc_init_max_req_msg_size;
u32 hwc_init_max_resp_msg_size;
struct completion hwc_init_eqe_comp;
struct hwc_wq *rxq;
struct hwc_wq *txq;
struct hwc_cq *cq;
struct semaphore sema;
struct gdma_resource inflight_msg_res;
struct hwc_caller_ctx *caller_ctx;
};
int mana_hwc_create_channel(struct gdma_context *gc);
void mana_hwc_destroy_channel(struct gdma_context *gc);
int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
const void *req, u32 resp_len, void *resp);
#endif /* _HW_CHANNEL_H */

View File

@ -0,0 +1,533 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Copyright (c) 2021, Microsoft Corporation. */
#ifndef _MANA_H
#define _MANA_H
#include "gdma.h"
#include "hw_channel.h"
/* Microsoft Azure Network Adapter (MANA)'s definitions
*
* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
/* MANA protocol version */
#define MANA_MAJOR_VERSION 0
#define MANA_MINOR_VERSION 1
#define MANA_MICRO_VERSION 1
typedef u64 mana_handle_t;
#define INVALID_MANA_HANDLE ((mana_handle_t)-1)
enum TRI_STATE {
TRI_STATE_UNKNOWN = -1,
TRI_STATE_FALSE = 0,
TRI_STATE_TRUE = 1
};
/* Number of entries for hardware indirection table must be in power of 2 */
#define MANA_INDIRECT_TABLE_SIZE 64
#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
#define MANA_HASH_KEY_SIZE 40
#define COMP_ENTRY_SIZE 64
#define ADAPTER_MTU_SIZE 1500
#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
#define RX_BUFFERS_PER_QUEUE 512
#define MAX_SEND_BUFFERS_PER_QUEUE 256
#define EQ_SIZE (8 * PAGE_SIZE)
#define LOG2_EQ_THROTTLE 3
#define MAX_PORTS_IN_MANA_DEV 16
struct mana_stats {
u64 packets;
u64 bytes;
struct u64_stats_sync syncp;
};
struct mana_txq {
struct gdma_queue *gdma_sq;
union {
u32 gdma_txq_id;
struct {
u32 reserved1 : 10;
u32 vsq_frame : 14;
u32 reserved2 : 8;
};
};
u16 vp_offset;
struct net_device *ndev;
/* The SKBs are sent to the HW and we are waiting for the CQEs. */
struct sk_buff_head pending_skbs;
struct netdev_queue *net_txq;
atomic_t pending_sends;
struct mana_stats stats;
};
/* skb data and frags dma mappings */
struct mana_skb_head {
dma_addr_t dma_handle[MAX_SKB_FRAGS + 1];
u32 size[MAX_SKB_FRAGS + 1];
};
#define MANA_HEADROOM sizeof(struct mana_skb_head)
enum mana_tx_pkt_format {
MANA_SHORT_PKT_FMT = 0,
MANA_LONG_PKT_FMT = 1,
};
struct mana_tx_short_oob {
u32 pkt_fmt : 2;
u32 is_outer_ipv4 : 1;
u32 is_outer_ipv6 : 1;
u32 comp_iphdr_csum : 1;
u32 comp_tcp_csum : 1;
u32 comp_udp_csum : 1;
u32 supress_txcqe_gen : 1;
u32 vcq_num : 24;
u32 trans_off : 10; /* Transport header offset */
u32 vsq_frame : 14;
u32 short_vp_offset : 8;
}; /* HW DATA */
struct mana_tx_long_oob {
u32 is_encap : 1;
u32 inner_is_ipv6 : 1;
u32 inner_tcp_opt : 1;
u32 inject_vlan_pri_tag : 1;
u32 reserved1 : 12;
u32 pcp : 3; /* 802.1Q */
u32 dei : 1; /* 802.1Q */
u32 vlan_id : 12; /* 802.1Q */
u32 inner_frame_offset : 10;
u32 inner_ip_rel_offset : 6;
u32 long_vp_offset : 12;
u32 reserved2 : 4;
u32 reserved3;
u32 reserved4;
}; /* HW DATA */
struct mana_tx_oob {
struct mana_tx_short_oob s_oob;
struct mana_tx_long_oob l_oob;
}; /* HW DATA */
enum mana_cq_type {
MANA_CQ_TYPE_RX,
MANA_CQ_TYPE_TX,
};
enum mana_cqe_type {
CQE_INVALID = 0,
CQE_RX_OKAY = 1,
CQE_RX_COALESCED_4 = 2,
CQE_RX_OBJECT_FENCE = 3,
CQE_RX_TRUNCATED = 4,
CQE_TX_OKAY = 32,
CQE_TX_SA_DROP = 33,
CQE_TX_MTU_DROP = 34,
CQE_TX_INVALID_OOB = 35,
CQE_TX_INVALID_ETH_TYPE = 36,
CQE_TX_HDR_PROCESSING_ERROR = 37,
CQE_TX_VF_DISABLED = 38,
CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39,
CQE_TX_VPORT_DISABLED = 40,
CQE_TX_VLAN_TAGGING_VIOLATION = 41,
};
#define MANA_CQE_COMPLETION 1
struct mana_cqe_header {
u32 cqe_type : 6;
u32 client_type : 2;
u32 vendor_err : 24;
}; /* HW DATA */
/* NDIS HASH Types */
#define NDIS_HASH_IPV4 BIT(0)
#define NDIS_HASH_TCP_IPV4 BIT(1)
#define NDIS_HASH_UDP_IPV4 BIT(2)
#define NDIS_HASH_IPV6 BIT(3)
#define NDIS_HASH_TCP_IPV6 BIT(4)
#define NDIS_HASH_UDP_IPV6 BIT(5)
#define NDIS_HASH_IPV6_EX BIT(6)
#define NDIS_HASH_TCP_IPV6_EX BIT(7)
#define NDIS_HASH_UDP_IPV6_EX BIT(8)
#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX)
#define MANA_HASH_L4 \
(NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \
NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX)
struct mana_rxcomp_perpkt_info {
u32 pkt_len : 16;
u32 reserved1 : 16;
u32 reserved2;
u32 pkt_hash;
}; /* HW DATA */
#define MANA_RXCOMP_OOB_NUM_PPI 4
/* Receive completion OOB */
struct mana_rxcomp_oob {
struct mana_cqe_header cqe_hdr;
u32 rx_vlan_id : 12;
u32 rx_vlantag_present : 1;
u32 rx_outer_iphdr_csum_succeed : 1;
u32 rx_outer_iphdr_csum_fail : 1;
u32 reserved1 : 1;
u32 rx_hashtype : 9;
u32 rx_iphdr_csum_succeed : 1;
u32 rx_iphdr_csum_fail : 1;
u32 rx_tcp_csum_succeed : 1;
u32 rx_tcp_csum_fail : 1;
u32 rx_udp_csum_succeed : 1;
u32 rx_udp_csum_fail : 1;
u32 reserved2 : 1;
struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI];
u32 rx_wqe_offset;
}; /* HW DATA */
struct mana_tx_comp_oob {
struct mana_cqe_header cqe_hdr;
u32 tx_data_offset;
u32 tx_sgl_offset : 5;
u32 tx_wqe_offset : 27;
u32 reserved[12];
}; /* HW DATA */
struct mana_rxq;
struct mana_cq {
struct gdma_queue *gdma_cq;
/* Cache the CQ id (used to verify if each CQE comes to the right CQ. */
u32 gdma_id;
/* Type of the CQ: TX or RX */
enum mana_cq_type type;
/* Pointer to the mana_rxq that is pushing RX CQEs to the queue.
* Only and must be non-NULL if type is MANA_CQ_TYPE_RX.
*/
struct mana_rxq *rxq;
/* Pointer to the mana_txq that is pushing TX CQEs to the queue.
* Only and must be non-NULL if type is MANA_CQ_TYPE_TX.
*/
struct mana_txq *txq;
/* Pointer to a buffer which the CQ handler can copy the CQE's into. */
struct gdma_comp *gdma_comp_buf;
};
#define GDMA_MAX_RQE_SGES 15
struct mana_recv_buf_oob {
/* A valid GDMA work request representing the data buffer. */
struct gdma_wqe_request wqe_req;
void *buf_va;
dma_addr_t buf_dma_addr;
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
struct gdma_sge sgl[GDMA_MAX_RQE_SGES];
/* Required to store the result of mana_gd_post_work_request.
* gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the
* work queue when the WQE is consumed.
*/
struct gdma_posted_wqe_info wqe_inf;
};
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
u32 gdma_id;
/* Index of RQ in the vPort, not gdma receive queue id */
u32 rxq_idx;
u32 datasize;
mana_handle_t rxobj;
struct mana_cq rx_cq;
struct net_device *ndev;
/* Total number of receive buffers to be allocated */
u32 num_rx_buf;
u32 buf_index;
struct mana_stats stats;
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
struct mana_recv_buf_oob rx_oobs[];
};
struct mana_tx_qp {
struct mana_txq txq;
struct mana_cq tx_cq;
mana_handle_t tx_object;
};
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
};
struct mana_context {
struct gdma_dev *gdma_dev;
u16 num_ports;
struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
};
struct mana_port_context {
struct mana_context *ac;
struct net_device *ndev;
u8 mac_addr[ETH_ALEN];
struct mana_eq *eqs;
enum TRI_STATE rss_state;
mana_handle_t default_rxobj;
bool tx_shortform_allowed;
u16 tx_vp_offset;
struct mana_tx_qp *tx_qp;
/* Indirection Table for RX & TX. The values are queue indexes */
u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
/* Indirection table containing RxObject Handles */
mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
/* Hash key used by the NIC */
u8 hashkey[MANA_HASH_KEY_SIZE];
/* This points to an array of num_queues of RQ pointers. */
struct mana_rxq **rxqs;
/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
unsigned int max_queues;
unsigned int num_queues;
mana_handle_t port_handle;
u16 port_idx;
bool port_is_up;
bool port_st_save; /* Saved port state */
struct mana_ethtool_stats eth_stats;
};
int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx,
bool update_hash, bool update_tab);
int mana_alloc_queues(struct net_device *ndev);
int mana_attach(struct net_device *ndev);
int mana_detach(struct net_device *ndev, bool from_close);
int mana_probe(struct gdma_dev *gd);
void mana_remove(struct gdma_dev *gd);
extern const struct ethtool_ops mana_ethtool_ops;
struct mana_obj_spec {
u32 queue_index;
u64 gdma_region;
u32 queue_size;
u32 attached_eq;
u32 modr_ctx_id;
};
enum mana_command_code {
MANA_QUERY_DEV_CONFIG = 0x20001,
MANA_QUERY_GF_STAT = 0x20002,
MANA_CONFIG_VPORT_TX = 0x20003,
MANA_CREATE_WQ_OBJ = 0x20004,
MANA_DESTROY_WQ_OBJ = 0x20005,
MANA_FENCE_RQ = 0x20006,
MANA_CONFIG_VPORT_RX = 0x20007,
MANA_QUERY_VPORT_CONFIG = 0x20008,
};
/* Query Device Configuration */
struct mana_query_device_cfg_req {
struct gdma_req_hdr hdr;
/* Driver Capability flags */
u64 drv_cap_flags1;
u64 drv_cap_flags2;
u64 drv_cap_flags3;
u64 drv_cap_flags4;
u32 proto_major_ver;
u32 proto_minor_ver;
u32 proto_micro_ver;
u32 reserved;
}; /* HW DATA */
struct mana_query_device_cfg_resp {
struct gdma_resp_hdr hdr;
u64 pf_cap_flags1;
u64 pf_cap_flags2;
u64 pf_cap_flags3;
u64 pf_cap_flags4;
u16 max_num_vports;
u16 reserved;
u32 max_num_eqs;
}; /* HW DATA */
/* Query vPort Configuration */
struct mana_query_vport_cfg_req {
struct gdma_req_hdr hdr;
u32 vport_index;
}; /* HW DATA */
struct mana_query_vport_cfg_resp {
struct gdma_resp_hdr hdr;
u32 max_num_sq;
u32 max_num_rq;
u32 num_indirection_ent;
u32 reserved1;
u8 mac_addr[6];
u8 reserved2[2];
mana_handle_t vport;
}; /* HW DATA */
/* Configure vPort */
struct mana_config_vport_req {
struct gdma_req_hdr hdr;
mana_handle_t vport;
u32 pdid;
u32 doorbell_pageid;
}; /* HW DATA */
struct mana_config_vport_resp {
struct gdma_resp_hdr hdr;
u16 tx_vport_offset;
u8 short_form_allowed;
u8 reserved;
}; /* HW DATA */
/* Create WQ Object */
struct mana_create_wqobj_req {
struct gdma_req_hdr hdr;
mana_handle_t vport;
u32 wq_type;
u32 reserved;
u64 wq_gdma_region;
u64 cq_gdma_region;
u32 wq_size;
u32 cq_size;
u32 cq_moderation_ctx_id;
u32 cq_parent_qid;
}; /* HW DATA */
struct mana_create_wqobj_resp {
struct gdma_resp_hdr hdr;
u32 wq_id;
u32 cq_id;
mana_handle_t wq_obj;
}; /* HW DATA */
/* Destroy WQ Object */
struct mana_destroy_wqobj_req {
struct gdma_req_hdr hdr;
u32 wq_type;
u32 reserved;
mana_handle_t wq_obj_handle;
}; /* HW DATA */
struct mana_destroy_wqobj_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
/* Fence RQ */
struct mana_fence_rq_req {
struct gdma_req_hdr hdr;
mana_handle_t wq_obj_handle;
}; /* HW DATA */
struct mana_fence_rq_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
/* Configure vPort Rx Steering */
struct mana_cfg_rx_steer_req {
struct gdma_req_hdr hdr;
mana_handle_t vport;
u16 num_indir_entries;
u16 indir_tab_offset;
u32 rx_enable;
u32 rss_enable;
u8 update_default_rxobj;
u8 update_hashkey;
u8 update_indir_tab;
u8 reserved;
mana_handle_t default_rxobj;
u8 hashkey[MANA_HASH_KEY_SIZE];
}; /* HW DATA */
struct mana_cfg_rx_steer_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
#define MANA_MAX_NUM_QUEUES 16
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
struct mana_tx_package {
struct gdma_wqe_request wqe_req;
struct gdma_sge sgl_array[5];
struct gdma_sge *sgl_ptr;
struct mana_tx_oob tx_oob;
struct gdma_posted_wqe_info wqe_info;
};
#endif /* _MANA_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,250 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include "mana.h"
static const struct {
char name[ETH_GSTRING_LEN];
u16 offset;
} mana_eth_stats[] = {
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
};
static int mana_get_sset_count(struct net_device *ndev, int stringset)
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
if (stringset != ETH_SS_STATS)
return -EINVAL;
return ARRAY_SIZE(mana_eth_stats) + num_queues * 4;
}
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
u8 *p = data;
int i;
if (stringset != ETH_SS_STATS)
return;
for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) {
memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < num_queues; i++) {
sprintf(p, "rx_%d_packets", i);
p += ETH_GSTRING_LEN;
sprintf(p, "rx_%d_bytes", i);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < num_queues; i++) {
sprintf(p, "tx_%d_packets", i);
p += ETH_GSTRING_LEN;
sprintf(p, "tx_%d_bytes", i);
p += ETH_GSTRING_LEN;
}
}
static void mana_get_ethtool_stats(struct net_device *ndev,
struct ethtool_stats *e_stats, u64 *data)
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
void *eth_stats = &apc->eth_stats;
struct mana_stats *stats;
unsigned int start;
u64 packets, bytes;
int q, i = 0;
if (!apc->port_is_up)
return;
for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++)
data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
for (q = 0; q < num_queues; q++) {
stats = &apc->rxqs[q]->stats;
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
packets = stats->packets;
bytes = stats->bytes;
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
}
for (q = 0; q < num_queues; q++) {
stats = &apc->tx_qp[q].txq.stats;
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
packets = stats->packets;
bytes = stats->bytes;
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
}
}
static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd,
u32 *rules)
{
struct mana_port_context *apc = netdev_priv(ndev);
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
cmd->data = apc->num_queues;
return 0;
}
return -EOPNOTSUPP;
}
static u32 mana_get_rxfh_key_size(struct net_device *ndev)
{
return MANA_HASH_KEY_SIZE;
}
static u32 mana_rss_indir_size(struct net_device *ndev)
{
return MANA_INDIRECT_TABLE_SIZE;
}
static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mana_port_context *apc = netdev_priv(ndev);
int i;
if (hfunc)
*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
if (indir) {
for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
indir[i] = apc->indir_table[i];
}
if (key)
memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE);
return 0;
}
static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mana_port_context *apc = netdev_priv(ndev);
bool update_hash = false, update_table = false;
u32 save_table[MANA_INDIRECT_TABLE_SIZE];
u8 save_key[MANA_HASH_KEY_SIZE];
int i, err;
if (!apc->port_is_up)
return -EOPNOTSUPP;
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
if (indir) {
for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
if (indir[i] >= apc->num_queues)
return -EINVAL;
update_table = true;
for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
save_table[i] = apc->indir_table[i];
apc->indir_table[i] = indir[i];
}
}
if (key) {
update_hash = true;
memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE);
memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE);
}
err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
if (err) { /* recover to original values */
if (update_table) {
for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
apc->indir_table[i] = save_table[i];
}
if (update_hash)
memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE);
mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
}
return err;
}
static void mana_get_channels(struct net_device *ndev,
struct ethtool_channels *channel)
{
struct mana_port_context *apc = netdev_priv(ndev);
channel->max_combined = apc->max_queues;
channel->combined_count = apc->num_queues;
}
static int mana_set_channels(struct net_device *ndev,
struct ethtool_channels *channels)
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int new_count = channels->combined_count;
unsigned int old_count = apc->num_queues;
int err, err2;
if (!apc->port_is_up)
return -EOPNOTSUPP;
err = mana_detach(ndev, false);
if (err) {
netdev_err(ndev, "mana_detach failed: %d\n", err);
return err;
}
apc->num_queues = new_count;
err = mana_attach(ndev);
if (!err)
return 0;
netdev_err(ndev, "mana_attach failed: %d\n", err);
/* Try to roll it back to the old configuration. */
apc->num_queues = old_count;
err2 = mana_attach(ndev);
if (err2)
netdev_err(ndev, "mana re-attach failed: %d\n", err2);
return err;
}
const struct ethtool_ops mana_ethtool_ops = {
.get_ethtool_stats = mana_get_ethtool_stats,
.get_sset_count = mana_get_sset_count,
.get_strings = mana_get_strings,
.get_rxnfc = mana_get_rxnfc,
.get_rxfh_key_size = mana_get_rxfh_key_size,
.get_rxfh_indir_size = mana_rss_indir_size,
.get_rxfh = mana_get_rxfh,
.set_rxfh = mana_set_rxfh,
.get_channels = mana_get_channels,
.set_channels = mana_set_channels,
};

View File

@ -0,0 +1,291 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "shm_channel.h"
#define PAGE_FRAME_L48_WIDTH_BYTES 6
#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8)
#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF
#define PAGE_FRAME_H4_WIDTH_BITS 4
#define VECTOR_MASK 0xFFFF
#define SHMEM_VF_RESET_STATE ((u32)-1)
#define SMC_MSG_TYPE_ESTABLISH_HWC 1
#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0
#define SMC_MSG_TYPE_DESTROY_HWC 2
#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0
#define SMC_MSG_DIRECTION_REQUEST 0
#define SMC_MSG_DIRECTION_RESPONSE 1
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
/* Shared memory channel protocol header
*
* msg_type: set on request and response; response matches request.
* msg_version: newer PF writes back older response (matching request)
* older PF acts on latest version known and sets that version in result
* (less than request).
* direction: 0 for request, VF->PF; 1 for response, PF->VF.
* status: 0 on request,
* operation result on response (success = 0, failure = 1 or greater).
* reset_vf: If set on either establish or destroy request, indicates perform
* FLR before/after the operation.
* owner_is_pf: 1 indicates PF owned, 0 indicates VF owned.
*/
union smc_proto_hdr {
u32 as_uint32;
struct {
u8 msg_type : 3;
u8 msg_version : 3;
u8 reserved_1 : 1;
u8 direction : 1;
u8 status;
u8 reserved_2;
u8 reset_vf : 1;
u8 reserved_3 : 6;
u8 owner_is_pf : 1;
};
}; /* HW DATA */
#define SMC_APERTURE_BITS 256
#define SMC_BASIC_UNIT (sizeof(u32))
#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8))
#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1)
static int mana_smc_poll_register(void __iomem *base, bool reset)
{
void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT;
u32 last_dword;
int i;
/* Poll the hardware for the ownership bit. This should be pretty fast,
* but let's do it in a loop just in case the hardware or the PF
* driver are temporarily busy.
*/
for (i = 0; i < 20 * 1000; i++) {
last_dword = readl(ptr);
/* shmem reads as 0xFFFFFFFF in the reset case */
if (reset && last_dword == SHMEM_VF_RESET_STATE)
return 0;
/* If bit_31 is set, the PF currently owns the SMC. */
if (!(last_dword & BIT(31)))
return 0;
usleep_range(1000, 2000);
}
return -ETIMEDOUT;
}
static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type,
u32 msg_version, bool reset_vf)
{
void __iomem *base = sc->base;
union smc_proto_hdr hdr;
int err;
/* Wait for PF to respond. */
err = mana_smc_poll_register(base, reset_vf);
if (err)
return err;
hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT);
if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE)
return 0;
/* Validate protocol fields from the PF driver */
if (hdr.msg_type != msg_type || hdr.msg_version > msg_version ||
hdr.direction != SMC_MSG_DIRECTION_RESPONSE) {
dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n",
hdr.as_uint32, msg_type, msg_version);
return -EPROTO;
}
/* Validate the operation result */
if (hdr.status != 0) {
dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status);
return -EPROTO;
}
return 0;
}
void mana_smc_init(struct shm_channel *sc, struct device *dev,
void __iomem *base)
{
sc->dev = dev;
sc->base = base;
}
int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr,
u64 cq_addr, u64 rq_addr, u64 sq_addr,
u32 eq_msix_index)
{
union smc_proto_hdr *hdr;
u16 all_addr_h4bits = 0;
u16 frame_addr_seq = 0;
u64 frame_addr = 0;
u8 shm_buf[32];
u64 *shmem;
u32 *dword;
u8 *ptr;
int err;
int i;
/* Ensure VF already has possession of shared memory */
err = mana_smc_poll_register(sc->base, false);
if (err) {
dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err);
return err;
}
if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) ||
!PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr))
return -EINVAL;
if ((eq_msix_index & VECTOR_MASK) != eq_msix_index)
return -EINVAL;
/* Scheme for packing four addresses and extra info into 256 bits.
*
* Addresses must be page frame aligned, so only frame address bits
* are transferred.
*
* 52-bit frame addresses are split into the lower 48 bits and upper
* 4 bits. Lower 48 bits of 4 address are written sequentially from
* the start of the 256-bit shared memory region followed by 16 bits
* containing the upper 4 bits of the 4 addresses in sequence.
*
* A 16 bit EQ vector number fills out the next-to-last 32-bit dword.
*
* The final 32-bit dword is used for protocol control information as
* defined in smc_proto_hdr.
*/
memset(shm_buf, 0, sizeof(shm_buf));
ptr = shm_buf;
/* EQ addr: low 48 bits of frame address */
shmem = (u64 *)ptr;
frame_addr = PHYS_PFN(eq_addr);
*shmem = frame_addr & PAGE_FRAME_L48_MASK;
all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
ptr += PAGE_FRAME_L48_WIDTH_BYTES;
/* CQ addr: low 48 bits of frame address */
shmem = (u64 *)ptr;
frame_addr = PHYS_PFN(cq_addr);
*shmem = frame_addr & PAGE_FRAME_L48_MASK;
all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
ptr += PAGE_FRAME_L48_WIDTH_BYTES;
/* RQ addr: low 48 bits of frame address */
shmem = (u64 *)ptr;
frame_addr = PHYS_PFN(rq_addr);
*shmem = frame_addr & PAGE_FRAME_L48_MASK;
all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
ptr += PAGE_FRAME_L48_WIDTH_BYTES;
/* SQ addr: low 48 bits of frame address */
shmem = (u64 *)ptr;
frame_addr = PHYS_PFN(sq_addr);
*shmem = frame_addr & PAGE_FRAME_L48_MASK;
all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
ptr += PAGE_FRAME_L48_WIDTH_BYTES;
/* High 4 bits of the four frame addresses */
*((u16 *)ptr) = all_addr_h4bits;
ptr += sizeof(u16);
/* EQ MSIX vector number */
*((u16 *)ptr) = (u16)eq_msix_index;
ptr += sizeof(u16);
/* 32-bit protocol header in final dword */
*((u32 *)ptr) = 0;
hdr = (union smc_proto_hdr *)ptr;
hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC;
hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION;
hdr->direction = SMC_MSG_DIRECTION_REQUEST;
hdr->reset_vf = reset_vf;
/* Write 256-message buffer to shared memory (final 32-bit write
* triggers HW to set possession bit to PF).
*/
dword = (u32 *)shm_buf;
for (i = 0; i < SMC_APERTURE_DWORDS; i++)
writel(*dword++, sc->base + i * SMC_BASIC_UNIT);
/* Read shmem response (polling for VF possession) and validate.
* For setup, waiting for response on shared memory is not strictly
* necessary, since wait occurs later for results to appear in EQE's.
*/
err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC,
SMC_MSG_TYPE_ESTABLISH_HWC_VERSION,
reset_vf);
if (err) {
dev_err(sc->dev, "Error when setting up HWC: %d\n", err);
return err;
}
return 0;
}
int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf)
{
union smc_proto_hdr hdr = {};
int err;
/* Ensure already has possession of shared memory */
err = mana_smc_poll_register(sc->base, false);
if (err) {
dev_err(sc->dev, "Timeout when tearing down HWC\n");
return err;
}
/* Set up protocol header for HWC destroy message */
hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC;
hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION;
hdr.direction = SMC_MSG_DIRECTION_REQUEST;
hdr.reset_vf = reset_vf;
/* Write message in high 32 bits of 256-bit shared memory, causing HW
* to set possession bit to PF.
*/
writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT);
/* Read shmem response (polling for VF possession) and validate.
* For teardown, waiting for response is required to ensure hardware
* invalidates MST entries before software frees memory.
*/
err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC,
SMC_MSG_TYPE_DESTROY_HWC_VERSION,
reset_vf);
if (err) {
dev_err(sc->dev, "Error when tearing down HWC: %d\n", err);
return err;
}
return 0;
}

View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/* Copyright (c) 2021, Microsoft Corporation. */
#ifndef _SHM_CHANNEL_H
#define _SHM_CHANNEL_H
struct shm_channel {
struct device *dev;
void __iomem *base;
};
void mana_smc_init(struct shm_channel *sc, struct device *dev,
void __iomem *base);
int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr,
u64 cq_addr, u64 rq_addr, u64 sq_addr,
u32 eq_msix_index);
int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf);
#endif /* _SHM_CHANNEL_H */