diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 64eb0442c82f..e214b51d3c8b 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -47,6 +47,10 @@ #define GVE_RX_BUFFER_SIZE_DQO 2048 +#define GVE_XDP_ACTIONS 5 + +#define GVE_TX_MAX_HEADER_SIZE 182 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ @@ -230,7 +234,10 @@ struct gve_rx_ring { u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */ - + u64 xdp_tx_errors; + u64 xdp_redirect_errors; + u64 xdp_alloc_fails; + u64 xdp_actions[GVE_XDP_ACTIONS]; u32 q_num; /* queue index */ u32 ntfy_id; /* notification block index */ struct gve_queue_resources *q_resources; /* head and tail pointer idx */ @@ -238,6 +245,12 @@ struct gve_rx_ring { struct u64_stats_sync statss; /* sync stats for 32bit archs */ struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */ + + /* XDP stuff */ + struct xdp_rxq_info xdp_rxq; + struct xdp_rxq_info xsk_rxq; + struct xsk_buff_pool *xsk_pool; + struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; /* A TX desc ring entry */ @@ -258,7 +271,14 @@ struct gve_tx_iovec { * ring entry but only used for a pkt_desc not a seg_desc */ struct gve_tx_buffer_state { - struct sk_buff *skb; /* skb for this pkt */ + union { + struct sk_buff *skb; /* skb for this pkt */ + struct xdp_frame *xdp_frame; /* xdp_frame */ + }; + struct { + u16 size; /* size of xmitted xdp pkt */ + u8 is_xsk; /* xsk buff */ + } xdp; union { struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ struct { @@ -373,6 +393,8 @@ struct gve_tx_ring { struct { /* Spinlock for when cleanup in progress */ spinlock_t clean_lock; + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; }; /* DQO fields. */ @@ -450,6 +472,12 @@ struct gve_tx_ring { dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ + struct xsk_buff_pool *xsk_pool; + u32 xdp_xsk_wakeup; + u32 xdp_xsk_done; + u64 xdp_xsk_sent; + u64 xdp_xmit; + u64 xdp_xmit_errors; } ____cacheline_aligned; /* Wraps the info for one irq including the napi struct and the queues @@ -526,9 +554,11 @@ struct gve_priv { u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ + struct bpf_prog *xdp_prog; /* XDP BPF program */ u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ + u16 num_xdp_queues; struct gve_queue_config tx_cfg; struct gve_queue_config rx_cfg; struct gve_qpl_config qpl_cfg; /* map used QPL ids */ @@ -785,7 +815,17 @@ static inline u32 gve_num_tx_qpls(struct gve_priv *priv) if (priv->queue_format != GVE_GQI_QPL_FORMAT) return 0; - return priv->tx_cfg.num_queues; + return priv->tx_cfg.num_queues + priv->num_xdp_queues; +} + +/* Returns the number of XDP tx queue page lists + */ +static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) +{ + if (priv->queue_format != GVE_GQI_QPL_FORMAT) + return 0; + + return priv->num_xdp_queues; } /* Returns the number of rx queue page lists @@ -798,16 +838,35 @@ static inline u32 gve_num_rx_qpls(struct gve_priv *priv) return priv->rx_cfg.num_queues; } +static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid) +{ + return tx_qid; +} + +static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) +{ + return priv->tx_cfg.max_queues + rx_qid; +} + +static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) +{ + return gve_tx_qpl_id(priv, 0); +} + +static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv) +{ + return gve_rx_qpl_id(priv, 0); +} + /* Returns a pointer to the next available tx qpl in the list of qpls */ static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) +struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid) { - int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size); + int id = gve_tx_qpl_id(priv, tx_qid); - /* we are out of tx qpls */ - if (id >= gve_num_tx_qpls(priv)) + /* QPL already in use */ + if (test_bit(id, priv->qpl_cfg.qpl_id_map)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); @@ -817,14 +876,12 @@ struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) /* Returns a pointer to the next available rx qpl in the list of qpls */ static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) +struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid) { - int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size, - gve_num_tx_qpls(priv)); + int id = gve_rx_qpl_id(priv, rx_qid); - /* we are out of rx qpls */ - if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv)) + /* QPL already in use */ + if (test_bit(id, priv->qpl_cfg.qpl_id_map)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); @@ -843,7 +900,7 @@ static inline void gve_unassign_qpl(struct gve_priv *priv, int id) static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) { - if (id < gve_num_tx_qpls(priv)) + if (id < gve_rx_start_qpl_id(priv)) return DMA_TO_DEVICE; else return DMA_FROM_DEVICE; @@ -855,6 +912,21 @@ static inline bool gve_is_gqi(struct gve_priv *priv) priv->queue_format == GVE_GQI_QPL_FORMAT; } +static inline u32 gve_num_tx_queues(struct gve_priv *priv) +{ + return priv->tx_cfg.num_queues + priv->num_xdp_queues; +} + +static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) +{ + return priv->tx_cfg.num_queues + queue_id; +} + +static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) +{ + return gve_xdp_tx_queue_id(priv, 0); +} + /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, @@ -863,9 +935,15 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); +int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len, void *frame_p); +void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings(struct gve_priv *priv); -void gve_tx_free_rings_gqi(struct gve_priv *priv); +bool gve_xdp_poll(struct gve_notify_block *block, int budget); +int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings); +void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings); u32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 60061288ad9d..252974202a3f 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -516,12 +516,12 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) return gve_adminq_issue_cmd(priv, &cmd); } -int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues) +int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues) { int err; int i; - for (i = 0; i < num_queues; i++) { + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_create_tx_queue(priv, i); if (err) return err; @@ -604,12 +604,12 @@ static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) return 0; } -int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues) +int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues) { int err; int i; - for (i = 0; i < num_queues; i++) { + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_destroy_tx_queue(priv, i); if (err) return err; diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index cf29662e6ad1..f894beb3deaf 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -410,8 +410,8 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, dma_addr_t db_array_bus_addr, u32 num_ntfy_blks); int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); -int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues); -int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id); +int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index ce574d097e28..b18804e934d3 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -34,6 +34,11 @@ static u32 gve_get_msglevel(struct net_device *netdev) return priv->msg_enable; } +/* For the following stats column string names, make sure the order + * matches how it is filled in the code. For xdp_aborted, xdp_drop, + * xdp_pass, xdp_tx, xdp_redirect, make sure it also matches the order + * as declared in enum xdp_action inside file uapi/linux/bpf.h . + */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", @@ -49,12 +54,16 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", + "rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]", + "rx_xdp_tx[%u]", "rx_xdp_redirect[%u]", + "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", "rx_xdp_alloc_fails[%u]", }; static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", - "tx_dma_mapping_error[%u]", + "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]", + "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { @@ -81,8 +90,10 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); char *s = (char *)data; + int num_tx_queues; int i, j; + num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: memcpy(s, *gve_gstrings_main_stats, @@ -97,7 +108,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < num_tx_queues; i++) { for (j = 0; j < NUM_GVE_TX_CNTS; j++) { snprintf(s, ETH_GSTRING_LEN, gve_gstrings_tx_stats[j], i); @@ -124,12 +135,14 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) static int gve_get_sset_count(struct net_device *netdev, int sset) { struct gve_priv *priv = netdev_priv(netdev); + int num_tx_queues; + num_tx_queues = gve_num_tx_queues(priv); switch (sset) { case ETH_SS_STATS: return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN + (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) + - (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS); + (num_tx_queues * NUM_GVE_TX_CNTS); case ETH_SS_PRIV_FLAGS: return GVE_PRIV_FLAGS_STR_LEN; default: @@ -153,18 +166,20 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_priv *priv; bool skip_nic_stats; unsigned int start; + int num_tx_queues; int ring; int i, j; ASSERT_RTNL(); priv = netdev_priv(netdev); + num_tx_queues = gve_num_tx_queues(priv); report_stats = priv->stats_report->stats; rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; - tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues, + tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); @@ -195,7 +210,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; - ring < priv->tx_cfg.num_queues; ring++) { + ring < num_tx_queues; ring++) { if (priv->tx) { do { start = @@ -232,7 +247,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = GVE_MAIN_STATS_LEN; /* For rx cross-reporting stats, start from nic rx stats in report */ - base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues + + base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + base_stats_idx; @@ -283,14 +298,26 @@ gve_get_ethtool_stats(struct net_device *netdev, if (skip_nic_stats) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; - continue; - } - for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { - u64 value = - be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value); + } else { + stats_idx = rx_qid_to_stats_idx[ring]; + for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { + u64 value = + be64_to_cpu(report_stats[stats_idx + j].value); - data[i++] = value; + data[i++] = value; + } } + /* XDP rx counters */ + do { + start = u64_stats_fetch_begin(&priv->rx[ring].statss); + for (j = 0; j < GVE_XDP_ACTIONS; j++) + data[i + j] = rx->xdp_actions[j]; + data[i + j++] = rx->xdp_tx_errors; + data[i + j++] = rx->xdp_redirect_errors; + data[i + j++] = rx->xdp_alloc_fails; + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + i += GVE_XDP_ACTIONS + 3; /* XDP rx counters */ } } else { i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; @@ -298,7 +325,7 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -316,7 +343,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } /* walk TX rings */ if (priv->tx) { - for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + for (ring = 0; ring < num_tx_queues; ring++) { struct gve_tx_ring *tx = &priv->tx[ring]; if (gve_is_gqi(priv)) { @@ -346,16 +373,28 @@ gve_get_ethtool_stats(struct net_device *netdev, if (skip_nic_stats) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; - continue; - } - for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { - u64 value = - be64_to_cpu(report_stats[tx_qid_to_stats_idx[ring] + j].value); - data[i++] = value; + } else { + stats_idx = tx_qid_to_stats_idx[ring]; + for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { + u64 value = + be64_to_cpu(report_stats[stats_idx + j].value); + data[i++] = value; + } } + /* XDP xsk counters */ + data[i++] = tx->xdp_xsk_wakeup; + data[i++] = tx->xdp_xsk_done; + do { + start = u64_stats_fetch_begin(&priv->tx[ring].statss); + data[i] = tx->xdp_xsk_sent; + data[i + 1] = tx->xdp_xmit; + data[i + 2] = tx->xdp_xmit_errors; + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + i += 3; /* XDP tx counters */ } } else { - i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS; + i += num_tx_queues * NUM_GVE_TX_CNTS; } kfree(rx_qid_to_stats_idx); @@ -412,6 +451,12 @@ static int gve_set_channels(struct net_device *netdev, if (!new_rx || !new_tx) return -EINVAL; + if (priv->num_xdp_queues && + (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) { + dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); + return -EINVAL; + } + if (!netif_carrier_ok(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; @@ -502,7 +547,9 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) { struct gve_priv *priv = netdev_priv(netdev); u64 ori_flags, new_flags; + int num_tx_queues; + num_tx_queues = gve_num_tx_queues(priv); ori_flags = READ_ONCE(priv->ethtool_flags); new_flags = ori_flags; @@ -522,7 +569,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) /* delete report stats timer. */ if (!(flags & BIT(0)) && (ori_flags & BIT(0))) { int tx_stats_num = GVE_TX_STATS_REPORT_NUM * - priv->tx_cfg.num_queues; + num_tx_queues; int rx_stats_num = GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 07111c241e0e..57ce74315eba 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,8 +4,10 @@ * Copyright (C) 2015-2021 Google, Inc. */ +#include #include #include +#include #include #include #include @@ -15,6 +17,7 @@ #include #include #include +#include #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" @@ -90,8 +93,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) struct gve_priv *priv = netdev_priv(dev); unsigned int start; u64 packets, bytes; + int num_tx_queues; int ring; + num_tx_queues = gve_num_tx_queues(priv); if (priv->rx) { for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { @@ -106,7 +111,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } if (priv->tx) { - for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + for (ring = 0; ring < num_tx_queues; ring++) { do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); @@ -180,7 +185,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv) int tx_stats_num, rx_stats_num; tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * - priv->tx_cfg.num_queues; + gve_num_tx_queues(priv); rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * priv->rx_cfg.num_queues; priv->stats_report_len = struct_size(priv->stats_report, stats, @@ -245,8 +250,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) block = container_of(napi, struct gve_notify_block, napi); priv = block->priv; - if (block->tx) - reschedule |= gve_tx_poll(block, budget); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll(block, budget); + else + reschedule |= gve_xdp_poll(block, budget); + } + if (block->rx) { work_done = gve_rx_poll(block, budget); reschedule |= work_done == budget; @@ -580,13 +590,14 @@ static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) netif_napi_del(&block->napi); } -static int gve_register_qpls(struct gve_priv *priv) +static int gve_register_xdp_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int start_id; int err; int i; - for (i = 0; i < num_qpls; i++) { + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { err = gve_adminq_register_page_list(priv, &priv->qpls[i]); if (err) { netif_err(priv, drv, priv->dev, @@ -601,13 +612,50 @@ static int gve_register_qpls(struct gve_priv *priv) return 0; } -static int gve_unregister_qpls(struct gve_priv *priv) +static int gve_register_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int start_id; int err; int i; - for (i = 0; i < num_qpls; i++) { + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + } + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + } + return 0; +} + +static int gve_unregister_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int err; + int i; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); /* This failure will trigger a reset - no need to clean up */ if (err) { @@ -620,22 +668,76 @@ static int gve_unregister_qpls(struct gve_priv *priv) return 0; } -static int gve_create_rings(struct gve_priv *priv) +static int gve_unregister_qpls(struct gve_priv *priv) { + int start_id; int err; int i; - err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean up */ + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + priv->qpls[i].id); + return err; + } + } + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean up */ + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + priv->qpls[i].id); + return err; + } + } + return 0; +} + +static int gve_create_xdp_rings(struct gve_priv *priv) +{ + int err; + + err = gve_adminq_create_tx_queues(priv, + gve_xdp_tx_start_queue_id(priv), + priv->num_xdp_queues); + if (err) { + netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", + priv->num_xdp_queues); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", + priv->num_xdp_queues); + + return 0; +} + +static int gve_create_rings(struct gve_priv *priv) +{ + int num_tx_queues = gve_num_tx_queues(priv); + int err; + int i; + + err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); /* This failure will trigger a reset - no need to clean * up */ return err; } netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); if (err) { @@ -668,6 +770,23 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } +static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, + int (*napi_poll)(struct napi_struct *napi, + int budget)) +{ + int start_id = gve_xdp_tx_start_queue_id(priv); + int i; + + /* Add xdp tx napi & init sync stats*/ + for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + + u64_stats_init(&priv->tx[i].statss); + priv->tx[i].ntfy_id = ntfy_idx; + gve_add_napi(priv, ntfy_idx, napi_poll); + } +} + static void add_napi_init_sync_stats(struct gve_priv *priv, int (*napi_poll)(struct napi_struct *napi, int budget)) @@ -675,7 +794,7 @@ static void add_napi_init_sync_stats(struct gve_priv *priv, int i; /* Add tx napi & init sync stats*/ - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < gve_num_tx_queues(priv); i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); @@ -692,34 +811,51 @@ static void add_napi_init_sync_stats(struct gve_priv *priv, } } -static void gve_tx_free_rings(struct gve_priv *priv) +static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) { if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv); + gve_tx_free_rings_gqi(priv, start_id, num_rings); } else { gve_tx_free_rings_dqo(priv); } } +static int gve_alloc_xdp_rings(struct gve_priv *priv) +{ + int start_id; + int err = 0; + + if (!priv->num_xdp_queues) + return 0; + + start_id = gve_xdp_tx_start_queue_id(priv); + err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); + if (err) + return err; + add_napi_init_xdp_sync_stats(priv, gve_napi_poll); + + return 0; +} + static int gve_alloc_rings(struct gve_priv *priv) { int err; /* Setup tx rings */ - priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx), + priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), GFP_KERNEL); if (!priv->tx) return -ENOMEM; if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv); + err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); else err = gve_tx_alloc_rings_dqo(priv); if (err) goto free_tx; /* Setup rx rings */ - priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx), + priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), GFP_KERNEL); if (!priv->rx) { err = -ENOMEM; @@ -744,18 +880,39 @@ free_rx: kvfree(priv->rx); priv->rx = NULL; free_tx_queue: - gve_tx_free_rings(priv); + gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); free_tx: kvfree(priv->tx); priv->tx = NULL; return err; } -static int gve_destroy_rings(struct gve_priv *priv) +static int gve_destroy_xdp_rings(struct gve_priv *priv) { + int start_id; int err; - err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); + start_id = gve_xdp_tx_start_queue_id(priv); + err = gve_adminq_destroy_tx_queues(priv, + start_id, + priv->num_xdp_queues); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to destroy XDP queues\n"); + /* This failure will trigger a reset - no need to clean up */ + return err; + } + netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); + + return 0; +} + +static int gve_destroy_rings(struct gve_priv *priv) +{ + int num_tx_queues = gve_num_tx_queues(priv); + int err; + + err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to destroy tx queues\n"); @@ -782,17 +939,33 @@ static void gve_rx_free_rings(struct gve_priv *priv) gve_rx_free_rings_dqo(priv); } +static void gve_free_xdp_rings(struct gve_priv *priv) +{ + int ntfy_idx, start_id; + int i; + + start_id = gve_xdp_tx_start_queue_id(priv); + if (priv->tx) { + for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + gve_remove_napi(priv, ntfy_idx); + } + gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); + } +} + static void gve_free_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int ntfy_idx; int i; if (priv->tx) { - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = 0; i < num_tx_queues; i++) { ntfy_idx = gve_tx_idx_to_ntfy(priv, i); gve_remove_napi(priv, ntfy_idx); } - gve_tx_free_rings(priv); + gve_tx_free_rings(priv, 0, num_tx_queues); kvfree(priv->tx); priv->tx = NULL; } @@ -889,40 +1062,68 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); kvfree(qpl->page_buses); + qpl->page_buses = NULL; free_pages: kvfree(qpl->pages); + qpl->pages = NULL; priv->num_registered_pages -= qpl->num_entries; } -static int gve_alloc_qpls(struct gve_priv *priv) +static int gve_alloc_xdp_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int start_id; int i, j; int err; - if (num_qpls == 0) - return 0; - - priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); - if (!priv->qpls) - return -ENOMEM; - - for (i = 0; i < gve_num_tx_qpls(priv); i++) { + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, priv->tx_pages_per_qpl); if (err) goto free_qpls; } - for (; i < num_qpls; i++) { + + return 0; + +free_qpls: + for (j = start_id; j <= i; j++) + gve_free_queue_page_list(priv, j); + return err; +} + +static int gve_alloc_qpls(struct gve_priv *priv) +{ + int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; + int start_id; + int i, j; + int err; + + if (priv->queue_format != GVE_GQI_QPL_FORMAT) + return 0; + + priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); + if (!priv->qpls) + return -ENOMEM; + + start_id = gve_tx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { + err = gve_alloc_queue_page_list(priv, i, + priv->tx_pages_per_qpl); + if (err) + goto free_qpls; + } + + start_id = gve_rx_start_qpl_id(priv); + for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { err = gve_alloc_queue_page_list(priv, i, priv->rx_data_slot_cnt); if (err) goto free_qpls; } - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * + priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls), + priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), sizeof(unsigned long), GFP_KERNEL); if (!priv->qpl_cfg.qpl_id_map) { err = -ENOMEM; @@ -935,23 +1136,36 @@ free_qpls: for (j = 0; j <= i; j++) gve_free_queue_page_list(priv, j); kvfree(priv->qpls); + priv->qpls = NULL; return err; } +static void gve_free_xdp_qpls(struct gve_priv *priv) +{ + int start_id; + int i; + + start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) + gve_free_queue_page_list(priv, i); +} + static void gve_free_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; int i; - if (num_qpls == 0) + if (!priv->qpls) return; kvfree(priv->qpl_cfg.qpl_id_map); + priv->qpl_cfg.qpl_id_map = NULL; - for (i = 0; i < num_qpls; i++) + for (i = 0; i < max_queues; i++) gve_free_queue_page_list(priv, i); kvfree(priv->qpls); + priv->qpls = NULL; } /* Use this to schedule a reset when the device is capable of continuing @@ -969,11 +1183,109 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) +{ + struct napi_struct *napi; + struct gve_rx_ring *rx; + int err = 0; + int i, j; + u32 tx_qid; + + if (!priv->num_xdp_queues) + return 0; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + rx = &priv->rx[i]; + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + + err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, + napi->napi_id); + if (err) + goto err; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err) + goto err; + rx->xsk_pool = xsk_get_pool_from_qid(dev, i); + if (rx->xsk_pool) { + err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, + napi->napi_id); + if (err) + goto err; + err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err; + xsk_pool_set_rxq_info(rx->xsk_pool, + &rx->xsk_rxq); + } + } + + for (i = 0; i < priv->num_xdp_queues; i++) { + tx_qid = gve_xdp_tx_queue_id(priv, i); + priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); + } + return 0; + +err: + for (j = i; j >= 0; j--) { + rx = &priv->rx[j]; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) + xdp_rxq_info_unreg(&rx->xsk_rxq); + } + return err; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i, tx_qid; + + if (!priv->num_xdp_queues) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + xdp_rxq_info_unreg(&rx->xdp_rxq); + if (rx->xsk_pool) { + xdp_rxq_info_unreg(&rx->xsk_rxq); + rx->xsk_pool = NULL; + } + } + + for (i = 0; i < priv->num_xdp_queues; i++) { + tx_qid = gve_xdp_tx_queue_id(priv, i); + priv->tx[tx_qid].xsk_pool = NULL; + } +} + +static void gve_drain_page_cache(struct gve_priv *priv) +{ + struct page_frag_cache *nc; + int i; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + nc = &priv->rx[i].page_cache; + if (nc->va) { + __page_frag_cache_drain(virt_to_page(nc->va), + nc->pagecnt_bias); + nc->va = NULL; + } + } +} + static int gve_open(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); int err; + if (priv->xdp_prog) + priv->num_xdp_queues = priv->rx_cfg.num_queues; + else + priv->num_xdp_queues = 0; + err = gve_alloc_qpls(priv); if (err) return err; @@ -989,6 +1301,10 @@ static int gve_open(struct net_device *dev) if (err) goto free_rings; + err = gve_reg_xdp_info(priv, dev); + if (err) + goto free_rings; + err = gve_register_qpls(priv); if (err) goto reset; @@ -1043,6 +1359,7 @@ static int gve_close(struct net_device *dev) netif_carrier_off(dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); + gve_drain_page_cache(priv); err = gve_destroy_rings(priv); if (err) goto err; @@ -1053,6 +1370,7 @@ static int gve_close(struct net_device *dev) } del_timer_sync(&priv->stats_report_timer); + gve_unreg_xdp_info(priv); gve_free_rings(priv); gve_free_qpls(priv); priv->interface_down_cnt++; @@ -1069,6 +1387,306 @@ err: return gve_reset_recovery(priv, false); } +static int gve_remove_xdp_queues(struct gve_priv *priv) +{ + int err; + + err = gve_destroy_xdp_rings(priv); + if (err) + return err; + + err = gve_unregister_xdp_qpls(priv); + if (err) + return err; + + gve_unreg_xdp_info(priv); + gve_free_xdp_rings(priv); + gve_free_xdp_qpls(priv); + priv->num_xdp_queues = 0; + return 0; +} + +static int gve_add_xdp_queues(struct gve_priv *priv) +{ + int err; + + priv->num_xdp_queues = priv->tx_cfg.num_queues; + + err = gve_alloc_xdp_qpls(priv); + if (err) + goto err; + + err = gve_alloc_xdp_rings(priv); + if (err) + goto free_xdp_qpls; + + err = gve_reg_xdp_info(priv, priv->dev); + if (err) + goto free_xdp_rings; + + err = gve_register_xdp_qpls(priv); + if (err) + goto free_xdp_rings; + + err = gve_create_xdp_rings(priv); + if (err) + goto free_xdp_rings; + + return 0; + +free_xdp_rings: + gve_free_xdp_rings(priv); +free_xdp_qpls: + gve_free_xdp_qpls(priv); +err: + priv->num_xdp_queues = 0; + return err; +} + +static void gve_handle_link_status(struct gve_priv *priv, bool link_status) +{ + if (!gve_get_napi_enabled(priv)) + return; + + if (link_status == netif_carrier_ok(priv->dev)) + return; + + if (link_status) { + netdev_info(priv->dev, "Device link is up.\n"); + netif_carrier_on(priv->dev); + } else { + netdev_info(priv->dev, "Device link is down.\n"); + netif_carrier_off(priv->dev); + } +} + +static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bpf_prog *old_prog; + int err = 0; + u32 status; + + old_prog = READ_ONCE(priv->xdp_prog); + if (!netif_carrier_ok(priv->dev)) { + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + return 0; + } + + gve_turndown(priv); + if (!old_prog && prog) { + // Allocate XDP TX queues if an XDP program is + // being installed + err = gve_add_xdp_queues(priv); + if (err) + goto out; + } else if (old_prog && !prog) { + // Remove XDP TX queues if an XDP program is + // being uninstalled + err = gve_remove_xdp_queues(priv); + if (err) + goto out; + } + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + +out: + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); + return err; +} + +static int gve_xsk_pool_enable(struct net_device *dev, + struct xsk_buff_pool *pool, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi; + struct gve_rx_ring *rx; + int tx_qid; + int err; + + if (qid >= priv->rx_cfg.num_queues) { + dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); + return -EINVAL; + } + if (xsk_pool_get_rx_frame_size(pool) < + priv->dev->max_mtu + sizeof(struct ethhdr)) { + dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); + return -EINVAL; + } + + err = xsk_pool_dma_map(pool, &priv->pdev->dev, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + if (err) + return err; + + /* If XDP prog is not installed, return */ + if (!priv->xdp_prog) + return 0; + + rx = &priv->rx[qid]; + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); + if (err) + goto err; + + err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err; + + xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +err: + if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) + xdp_rxq_info_unreg(&rx->xsk_rxq); + + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return err; +} + +static int gve_xsk_pool_disable(struct net_device *dev, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi_rx; + struct napi_struct *napi_tx; + struct xsk_buff_pool *pool; + int tx_qid; + + pool = xsk_get_pool_from_qid(dev, qid); + if (!pool) + return -EINVAL; + if (qid >= priv->rx_cfg.num_queues) + return -EINVAL; + + /* If XDP prog is not installed, unmap DMA and return */ + if (!priv->xdp_prog) + goto done; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + if (!netif_running(dev)) { + priv->rx[qid].xsk_pool = NULL; + xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); + priv->tx[tx_qid].xsk_pool = NULL; + goto done; + } + + napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; + napi_disable(napi_rx); /* make sure current rx poll is done */ + + napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; + napi_disable(napi_tx); /* make sure current tx poll is done */ + + priv->rx[qid].xsk_pool = NULL; + xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); + priv->tx[tx_qid].xsk_pool = NULL; + smp_mb(); /* Make sure it is visible to the workers on datapath */ + + napi_enable(napi_rx); + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + napi_enable(napi_tx); + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + +done: + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return 0; +} + +static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + + if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) + return -EINVAL; + + if (flags & XDP_WAKEUP_TX) { + struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; + struct napi_struct *napi = + &priv->ntfy_blocks[tx->ntfy_id].napi; + + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); + } + + tx->xdp_xsk_wakeup++; + } + + return 0; +} + +static int verify_xdp_configuration(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (dev->features & NETIF_F_LRO) { + netdev_warn(dev, "XDP is not supported when LRO is on.\n"); + return -EOPNOTSUPP; + } + + if (priv->queue_format != GVE_GQI_QPL_FORMAT) { + netdev_warn(dev, "XDP is not supported in mode %d.\n", + priv->queue_format); + return -EOPNOTSUPP; + } + + if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { + netdev_warn(dev, "XDP is not supported for mtu %d.\n", + dev->mtu); + return -EOPNOTSUPP; + } + + if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || + (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { + netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d", + priv->rx_cfg.num_queues, + priv->tx_cfg.num_queues, + priv->tx_cfg.max_queues); + return -EINVAL; + } + return 0; +} + +static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = verify_xdp_configuration(dev); + if (err) + return err; + switch (xdp->command) { + case XDP_SETUP_PROG: + return gve_set_xdp(priv, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + if (xdp->xsk.pool) + return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); + else + return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config) @@ -1118,7 +1736,7 @@ static void gve_turndown(struct gve_priv *priv) return; /* Disable napi to prevent more work from coming in */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; @@ -1146,7 +1764,7 @@ static void gve_turnup(struct gve_priv *priv) netif_tx_start_all_queues(priv->dev); /* Enable napi and unmask interrupts for all queues */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; @@ -1263,6 +1881,9 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_get_stats64 = gve_get_stats, .ndo_tx_timeout = gve_tx_timeout, .ndo_set_features = gve_set_features, + .ndo_bpf = gve_xdp, + .ndo_xdp_xmit = gve_xdp_xmit, + .ndo_xsk_wakeup = gve_xsk_wakeup, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -1306,7 +1927,7 @@ void gve_handle_report_stats(struct gve_priv *priv) be64_add_cpu(&priv->stats_report->written_count, 1); /* tx stats */ if (priv->tx) { - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { u32 last_completion = 0; u32 tx_frames = 0; @@ -1369,23 +1990,6 @@ void gve_handle_report_stats(struct gve_priv *priv) } } -static void gve_handle_link_status(struct gve_priv *priv, bool link_status) -{ - if (!gve_get_napi_enabled(priv)) - return; - - if (link_status == netif_carrier_ok(priv->dev)) - return; - - if (link_status) { - netdev_info(priv->dev, "Device link is up.\n"); - netif_carrier_on(priv->dev); - } else { - netdev_info(priv->dev, "Device link is down.\n"); - netif_carrier_off(priv->dev); - } -} - /* Handle NIC status register changes, reset requests and report stats */ static void gve_service_task(struct work_struct *work) { @@ -1399,6 +2003,18 @@ static void gve_service_task(struct work_struct *work) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } +static void gve_set_netdev_xdp_features(struct gve_priv *priv) +{ + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { + priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; + priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; + priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; + priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else { + priv->dev->xdp_features = 0; + } +} + static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) { int num_ntfy; @@ -1477,6 +2093,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) } setup_device: + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); if (!err) return 0; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 1f55137722b0..d1da7413dc4d 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -8,6 +8,9 @@ #include "gve_adminq.h" #include "gve_utils.h" #include +#include +#include +#include static void gve_rx_free_buffer(struct device *dev, struct gve_rx_slot_page_info *page_info, @@ -124,7 +127,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) return -ENOMEM; if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(priv); + rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num); if (!rx->data.qpl) { kvfree(rx->data.page_info); rx->data.page_info = NULL; @@ -556,7 +559,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, if (len <= priv->rx_copybreak && is_only_frag) { /* Just copy small packets */ - skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); + skb = gve_rx_copy(netdev, napi, page_info, len); if (skb) { u64_stats_update_begin(&rx->statss); rx->rx_copied_pkt++; @@ -591,6 +594,107 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, return skb; } +static int gve_xsk_pool_redirect(struct net_device *dev, + struct gve_rx_ring *rx, + void *data, int len, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff *xdp; + int err; + + if (rx->xsk_pool->frame_len < len) + return -E2BIG; + xdp = xsk_buff_alloc(rx->xsk_pool); + if (!xdp) { + u64_stats_update_begin(&rx->statss); + rx->xdp_alloc_fails++; + u64_stats_update_end(&rx->statss); + return -ENOMEM; + } + xdp->data_end = xdp->data + len; + memcpy(xdp->data, data, len); + err = xdp_do_redirect(dev, xdp, xdp_prog); + if (err) + xsk_buff_free(xdp); + return err; +} + +static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx, + struct xdp_buff *orig, struct bpf_prog *xdp_prog) +{ + int total_len, len = orig->data_end - orig->data; + int headroom = XDP_PACKET_HEADROOM; + struct xdp_buff new; + void *frame; + int err; + + if (rx->xsk_pool) + return gve_xsk_pool_redirect(dev, rx, orig->data, + len, xdp_prog); + + total_len = headroom + SKB_DATA_ALIGN(len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC); + if (!frame) { + u64_stats_update_begin(&rx->statss); + rx->xdp_alloc_fails++; + u64_stats_update_end(&rx->statss); + return -ENOMEM; + } + xdp_init_buff(&new, total_len, &rx->xdp_rxq); + xdp_prepare_buff(&new, frame, headroom, len, false); + memcpy(new.data, orig->data, len); + + err = xdp_do_redirect(dev, &new, xdp_prog); + if (err) + page_frag_free(frame); + + return err; +} + +static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act) +{ + struct gve_tx_ring *tx; + int tx_qid; + int err; + + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + break; + case XDP_TX: + tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); + tx = &priv->tx[tx_qid]; + spin_lock(&tx->xdp_lock); + err = gve_xdp_xmit_one(priv, tx, xdp->data, + xdp->data_end - xdp->data, NULL); + spin_unlock(&tx->xdp_lock); + + if (unlikely(err)) { + u64_stats_update_begin(&rx->statss); + rx->xdp_tx_errors++; + u64_stats_update_end(&rx->statss); + } + break; + case XDP_REDIRECT: + err = gve_xdp_redirect(priv->dev, rx, xdp, xprog); + + if (unlikely(err)) { + u64_stats_update_begin(&rx->statss); + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); + } + break; + } + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); +} + #define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, struct gve_rx_desc *desc, u32 idx, @@ -603,9 +707,12 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, union gve_rx_data_slot *data_slot; struct gve_priv *priv = rx->gve; struct sk_buff *skb = NULL; + struct bpf_prog *xprog; + struct xdp_buff xdp; dma_addr_t page_bus; void *va; + u16 len = frag_size; struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; bool is_first_frag = ctx->frag_cnt == 0; @@ -645,9 +752,35 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE); page_info->pad = is_first_frag ? GVE_RX_PAD : 0; + len -= page_info->pad; frag_size -= page_info->pad; - skb = gve_rx_skb(priv, rx, page_info, napi, frag_size, + xprog = READ_ONCE(priv->xdp_prog); + if (xprog && is_only_frag) { + void *old_data; + int xdp_act; + + xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq); + xdp_prepare_buff(&xdp, page_info->page_address + + page_info->page_offset, GVE_RX_PAD, + len, false); + old_data = xdp.data; + xdp_act = bpf_prog_run_xdp(xprog, &xdp); + if (xdp_act != XDP_PASS) { + gve_xdp_done(priv, rx, &xdp, xprog, xdp_act); + ctx->total_size += frag_size; + goto finish_ok_pkt; + } + + page_info->pad += xdp.data - old_data; + len = xdp.data_end - xdp.data; + + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + } + + skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot, is_only_frag); if (!skb) { u64_stats_update_begin(&rx->statss); @@ -773,6 +906,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, netdev_features_t feat) { + u64 xdp_redirects = rx->xdp_actions[XDP_REDIRECT]; + u64 xdp_txs = rx->xdp_actions[XDP_TX]; struct gve_rx_ctx *ctx = &rx->ctx; struct gve_priv *priv = rx->gve; struct gve_rx_cnts cnts = {0}; @@ -820,6 +955,12 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, u64_stats_update_end(&rx->statss); } + if (xdp_txs != rx->xdp_actions[XDP_TX]) + gve_xdp_tx_flush(priv, rx->q_num); + + if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT]) + xdp_do_flush(); + /* restock ring slots */ if (!rx->data.raw_addressing) { /* In QPL mode buffs are refilled as the desc are processed */ diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 630f42a3037b..e57b73eb70f6 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -568,7 +568,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (eop && buf_len <= priv->rx_copybreak) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, - &buf_state->page_info, buf_len, 0); + &buf_state->page_info, buf_len); if (unlikely(!rx->ctx.skb_head)) goto error; rx->ctx.skb_tail = rx->ctx.skb_head; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4888bf05fbed..e50510b8e784 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -11,6 +11,7 @@ #include #include #include +#include static inline void gve_tx_put_doorbell(struct gve_priv *priv, struct gve_queue_resources *q_resources, @@ -19,6 +20,14 @@ static inline void gve_tx_put_doorbell(struct gve_priv *priv, iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); } +void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid) +{ + u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid); + struct gve_tx_ring *tx = &priv->tx[tx_qid]; + + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); +} + /* gvnic can only transmit from a Registered Segment. * We copy skb payloads into the registered segment before writing Tx * descriptors and ringing the Tx doorbell. @@ -132,6 +141,58 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) atomic_add(bytes, &fifo->available); } +static size_t gve_tx_clear_buffer_state(struct gve_tx_buffer_state *info) +{ + size_t space_freed = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(info->iov); i++) { + space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; + info->iov[i].iov_len = 0; + info->iov[i].iov_padding = 0; + } + return space_freed; +} + +static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, + u32 to_do) +{ + struct gve_tx_buffer_state *info; + u32 clean_end = tx->done + to_do; + u64 pkts = 0, bytes = 0; + size_t space_freed = 0; + u32 xsk_complete = 0; + u32 idx; + + for (; tx->done < clean_end; tx->done++) { + idx = tx->done & tx->mask; + info = &tx->info[idx]; + + if (unlikely(!info->xdp.size)) + continue; + + bytes += info->xdp.size; + pkts++; + xsk_complete += info->xdp.is_xsk; + + info->xdp.size = 0; + if (info->xdp_frame) { + xdp_return_frame(info->xdp_frame); + info->xdp_frame = NULL; + } + space_freed += gve_tx_clear_buffer_state(info); + } + + gve_tx_free_fifo(&tx->tx_fifo, space_freed); + if (xsk_complete > 0 && tx->xsk_pool) + xsk_tx_completed(tx->xsk_pool, xsk_complete); + u64_stats_update_begin(&tx->statss); + tx->bytes_done += bytes; + tx->pkt_done += pkts; + u64_stats_update_end(&tx->statss); + return pkts; +} + static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do, bool try_to_wake); @@ -144,8 +205,12 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); - netdev_tx_reset_queue(tx->netdev_txq); + if (tx->q_num < priv->tx_cfg.num_queues) { + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + netdev_tx_reset_queue(tx->netdev_txq); + } else { + gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); + } dma_free_coherent(hdev, sizeof(*tx->q_resources), tx->q_resources, tx->q_resources_bus); @@ -177,6 +242,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) /* Make sure everything is zeroed to start */ memset(tx, 0, sizeof(*tx)); spin_lock_init(&tx->clean_lock); + spin_lock_init(&tx->xdp_lock); tx->q_num = idx; tx->mask = slots - 1; @@ -195,7 +261,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; tx->dev = &priv->pdev->dev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); + tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx); if (!tx->tx_fifo.qpl) goto abort_with_desc; /* map Tx FIFO */ @@ -213,7 +279,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, (unsigned long)tx->bus); - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + if (idx < priv->tx_cfg.num_queues) + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); gve_tx_add_to_block(priv, idx); return 0; @@ -233,12 +300,12 @@ abort_with_info: return -ENOMEM; } -int gve_tx_alloc_rings(struct gve_priv *priv) +int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings) { int err = 0; int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = start_id; i < start_id + num_rings; i++) { err = gve_tx_alloc_ring(priv, i); if (err) { netif_err(priv, drv, priv->dev, @@ -251,17 +318,17 @@ int gve_tx_alloc_rings(struct gve_priv *priv) if (err) { int j; - for (j = 0; j < i; j++) + for (j = start_id; j < i; j++) gve_tx_free_ring(priv, j); } return err; } -void gve_tx_free_rings_gqi(struct gve_priv *priv) +void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings) { int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) + for (i = start_id; i < start_id + num_rings; i++) gve_tx_free_ring(priv, i); } @@ -374,18 +441,18 @@ static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx, } static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, - struct sk_buff *skb, bool is_gso, + u16 csum_offset, u8 ip_summed, bool is_gso, int l4_hdr_offset, u32 desc_cnt, - u16 hlen, u64 addr) + u16 hlen, u64 addr, u16 pkt_len) { /* l4_hdr_offset and csum_offset are in units of 16-bit words */ if (is_gso) { pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; - pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_csum_offset = csum_offset >> 1; pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; - } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + } else if (likely(ip_summed == CHECKSUM_PARTIAL)) { pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; - pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_csum_offset = csum_offset >> 1; pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; } else { pkt_desc->pkt.type_flags = GVE_TXD_STD; @@ -393,7 +460,7 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, pkt_desc->pkt.l4_hdr_offset = 0; } pkt_desc->pkt.desc_cnt = desc_cnt; - pkt_desc->pkt.len = cpu_to_be16(skb->len); + pkt_desc->pkt.len = cpu_to_be16(pkt_len); pkt_desc->pkt.seg_len = cpu_to_be16(hlen); pkt_desc->pkt.seg_addr = cpu_to_be64(addr); } @@ -412,15 +479,16 @@ static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc, } static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, - struct sk_buff *skb, bool is_gso, + u16 l3_offset, u16 gso_size, + bool is_gso_v6, bool is_gso, u16 len, u64 addr) { seg_desc->seg.type_flags = GVE_TXD_SEG; if (is_gso) { - if (skb_is_gso_v6(skb)) + if (is_gso_v6) seg_desc->seg.type_flags |= GVE_TXSF_IPV6; - seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; - seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + seg_desc->seg.l3_offset = l3_offset >> 1; + seg_desc->seg.mss = cpu_to_be16(gso_size); } seg_desc->seg.seg_len = cpu_to_be16(len); seg_desc->seg.seg_addr = cpu_to_be64(addr); @@ -473,9 +541,10 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, &info->iov[payload_iov]); - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed, + is_gso, l4_hdr_offset, 1 + mtd_desc_nr + payload_nfrags, hlen, - info->iov[hdr_nfrags - 1].iov_offset); + info->iov[hdr_nfrags - 1].iov_offset, skb->len); skb_copy_bits(skb, 0, tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, @@ -494,7 +563,9 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; seg_desc = &tx->desc[next_idx]; - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, info->iov[i].iov_len, info->iov[i].iov_offset); @@ -552,8 +623,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, if (mtd_desc_nr) num_descriptors++; - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, - num_descriptors, hlen, addr); + gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed, + is_gso, l4_hdr_offset, + num_descriptors, hlen, addr, skb->len); if (mtd_desc_nr) { idx = (idx + 1) & tx->mask; @@ -569,7 +641,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, addr += hlen; idx = (idx + 1) & tx->mask; seg_desc = &tx->desc[idx]; - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, len, addr); } for (i = 0; i < shinfo->nr_frags; i++) { @@ -587,7 +661,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, dma_unmap_len_set(&tx->info[idx], len, len); dma_unmap_addr_set(&tx->info[idx], dma, addr); - gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb), + skb_shinfo(skb)->gso_size, + skb_is_gso_v6(skb), is_gso, len, addr); } return num_descriptors; @@ -648,6 +724,103 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len, void *frame_p, bool is_xsk) +{ + int pad, nfrags, ndescs, iovi, offset; + struct gve_tx_buffer_state *info; + u32 reqi = tx->req; + + pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len); + if (pad >= GVE_TX_MAX_HEADER_SIZE) + pad = 0; + info = &tx->info[reqi & tx->mask]; + info->xdp_frame = frame_p; + info->xdp.size = len; + info->xdp.is_xsk = is_xsk; + + nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len, + &info->iov[0]); + iovi = pad > 0; + ndescs = nfrags - iovi; + offset = 0; + + while (iovi < nfrags) { + if (!offset) + gve_tx_fill_pkt_desc(&tx->desc[reqi & tx->mask], 0, + CHECKSUM_NONE, false, 0, ndescs, + info->iov[iovi].iov_len, + info->iov[iovi].iov_offset, len); + else + gve_tx_fill_seg_desc(&tx->desc[reqi & tx->mask], + 0, 0, false, false, + info->iov[iovi].iov_len, + info->iov[iovi].iov_offset); + + memcpy(tx->tx_fifo.base + info->iov[iovi].iov_offset, + data + offset, info->iov[iovi].iov_len); + gve_dma_sync_for_device(&priv->pdev->dev, + tx->tx_fifo.qpl->page_buses, + info->iov[iovi].iov_offset, + info->iov[iovi].iov_len); + offset += info->iov[iovi].iov_len; + iovi++; + reqi++; + } + + return ndescs; +} + +int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx; + int i, err = 0, qid; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + qid = gve_xdp_tx_queue_id(priv, + smp_processor_id() % priv->num_xdp_queues); + + tx = &priv->tx[qid]; + + spin_lock(&tx->xdp_lock); + for (i = 0; i < n; i++) { + err = gve_xdp_xmit_one(priv, tx, frames[i]->data, + frames[i]->len, frames[i]); + if (err) + break; + } + + if (flags & XDP_XMIT_FLUSH) + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + + spin_unlock(&tx->xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xmit += n; + tx->xdp_xmit_errors += n - i; + u64_stats_update_end(&tx->statss); + + return i ? i : err; +} + +int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len, void *frame_p) +{ + int nsegs; + + if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1)) + return -EBUSY; + + nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p, false); + tx->req += nsegs; + + return 0; +} + #define GVE_TX_START_THRESH PAGE_SIZE static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, @@ -657,8 +830,8 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u64 pkts = 0, bytes = 0; size_t space_freed = 0; struct sk_buff *skb; - int i, j; u32 idx; + int j; for (j = 0; j < to_do; j++) { idx = tx->done & tx->mask; @@ -680,12 +853,7 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, dev_consume_skb_any(skb); if (tx->raw_addressing) continue; - /* FIFO free */ - for (i = 0; i < ARRAY_SIZE(info->iov); i++) { - space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; - info->iov[i].iov_len = 0; - info->iov[i].iov_padding = 0; - } + space_freed += gve_tx_clear_buffer_state(info); } } @@ -720,6 +888,70 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv, return be32_to_cpu(counter); } +static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx, + int budget) +{ + struct xdp_desc desc; + int sent = 0, nsegs; + void *data; + + spin_lock(&tx->xdp_lock); + while (sent < budget) { + if (!gve_can_tx(tx, GVE_TX_START_THRESH)) + goto out; + + if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) { + tx->xdp_xsk_done = tx->xdp_xsk_wakeup; + goto out; + } + + data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr); + nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true); + tx->req += nsegs; + sent++; + } +out: + if (sent > 0) { + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + xsk_tx_release(tx->xsk_pool); + } + spin_unlock(&tx->xdp_lock); + return sent; +} + +bool gve_xdp_poll(struct gve_notify_block *block, int budget) +{ + struct gve_priv *priv = block->priv; + struct gve_tx_ring *tx = block->tx; + u32 nic_done; + bool repoll; + u32 to_do; + + /* If budget is 0, do all the work */ + if (budget == 0) + budget = INT_MAX; + + /* Find out how much work there is to be done */ + nic_done = gve_tx_load_event_counter(priv, tx); + to_do = min_t(u32, (nic_done - tx->done), budget); + gve_clean_xdp_done(priv, tx, to_do); + repoll = nic_done != tx->done; + + if (tx->xsk_pool) { + int sent = gve_xsk_tx(priv, tx, budget); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + repoll |= (sent == budget); + if (xsk_uses_need_wakeup(tx->xsk_pool)) + xsk_set_tx_need_wakeup(tx->xsk_pool); + } + + /* If we still have work we want to repoll */ + return repoll; +} + bool gve_tx_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 6ba46adaaee3..26e08d753270 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -49,10 +49,10 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) } struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len, - u16 padding) + struct gve_rx_slot_page_info *page_info, u16 len) { - void *va = page_info->page_address + padding + page_info->page_offset; + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 79595940b351..324fd98a6112 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -18,8 +18,7 @@ void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len, - u16 pad); + struct gve_rx_slot_page_info *page_info, u16 len); /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);