Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
ice: xsk: ZC changes

Maciej Fijalkowski says:

This set consists of two fixes to issues that were either pointed out on
indirectly (John was reviewing AF_XDP selftests that were testing ice's
ZC support) mailing list or were directly reported by customers.

First patch allows user space to see done descriptor in CQ even after a
single frame being transmitted and second patch removes the need for
having HW rings sized to power of 2 number of descriptors when used
against AF_XDP.

I also forgot to mention that due to the current Tx cleaning algorithm,
4k HW ring was broken and these two patches bring it back to life, so we
kill two birds with one stone.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: xsk: drop power of 2 ring size restriction for AF_XDP
  ice: xsk: change batched Tx descriptor cleaning
====================

Link: https://lore.kernel.org/r/20220927164112.4011983-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-09-28 19:04:43 -07:00
commit 3e1308a7c8
3 changed files with 72 additions and 102 deletions

View file

@ -1467,7 +1467,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
bool wd;
if (tx_ring->xsk_pool)
wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
wd = ice_xmit_zc(tx_ring);
else if (ice_ring_is_xdp(tx_ring))
wd = true;
else

View file

@ -392,13 +392,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
goto failure;
}
if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
!is_power_of_2(vsi->tx_rings[qid]->count)) {
netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
pool_failure = -EINVAL;
goto failure;
}
if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
if (if_running) {
@ -534,11 +527,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
{
u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
u16 batched, leftover, i, tail_bumps;
u16 leftover, i, tail_bumps;
batched = ALIGN_DOWN(count, rx_thresh);
tail_bumps = batched / rx_thresh;
leftover = count & (rx_thresh - 1);
tail_bumps = count / rx_thresh;
leftover = count - (tail_bumps * rx_thresh);
for (i = 0; i < tail_bumps; i++)
if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
@ -788,69 +780,57 @@ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
}
/**
* ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
* @xdp_ring: XDP ring to clean
* @napi_budget: amount of descriptors that NAPI allows us to clean
*
* Returns count of cleaned descriptors
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
*/
static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
int budget = napi_budget / tx_thresh;
u16 next_dd = xdp_ring->next_dd;
u16 ntc, cleared_dds = 0;
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;
do {
struct ice_tx_desc *next_dd_desc;
u16 desc_cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u32 xsk_frames;
u16 i;
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if ((tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}
next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
if (!(next_dd_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
break;
if (!xsk_frames)
return;
cleared_dds++;
xsk_frames = 0;
if (likely(!xdp_ring->xdp_tx_active)) {
xsk_frames = tx_thresh;
goto skip;
if (likely(!xdp_ring->xdp_tx_active))
goto skip;
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}
ntc = xdp_ring->next_to_clean;
for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}
ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
}
ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
}
skip:
xdp_ring->next_to_clean += tx_thresh;
if (xdp_ring->next_to_clean >= desc_cnt)
xdp_ring->next_to_clean -= desc_cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
next_dd_desc->cmd_type_offset_bsz = 0;
next_dd = next_dd + tx_thresh;
if (next_dd >= desc_cnt)
next_dd = tx_thresh - 1;
} while (--budget);
xdp_ring->next_dd = next_dd;
return cleared_dds * tx_thresh;
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
/**
@ -885,7 +865,6 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
unsigned int *total_bytes)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
u32 i;
@ -905,13 +884,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
}
xdp_ring->next_to_use = ntu;
if (xdp_ring->next_to_use > xdp_ring->next_rs) {
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
}
}
/**
@ -924,7 +896,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
u32 nb_pkts, unsigned int *total_bytes)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u32 batched, leftover, i;
batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
@ -933,54 +904,54 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d
ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
for (; i < batched + leftover; i++)
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
}
if (xdp_ring->next_to_use > xdp_ring->next_rs) {
struct ice_tx_desc *tx_desc;
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*/
static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
{
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
}
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
}
/**
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
* @budget: number of free descriptors on HW Tx ring that can be used
* @napi_budget: amount of descriptors that NAPI allows us to clean
*
* Returns true if there is no more work that needs to be done, false otherwise
*/
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
{
struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;
int budget;
if (budget < tx_thresh)
budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
ice_clean_xdp_irq_zc(xdp_ring);
budget = ICE_DESC_UNUSED(xdp_ring);
budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
if (!nb_pkts)
return true;
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
struct ice_tx_desc *tx_desc;
nb_processed = xdp_ring->count - xdp_ring->next_to_use;
ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs = tx_thresh - 1;
xdp_ring->next_to_use = 0;
}
ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
&total_bytes);
ice_set_rs_bit(xdp_ring);
ice_xdp_ring_update_tail(xdp_ring);
ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
@ -1058,14 +1029,16 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
*/
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
u16 count_mask = rx_ring->count - 1;
u16 ntc = rx_ring->next_to_clean;
u16 ntu = rx_ring->next_to_use;
for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
while (ntc != ntu) {
struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
xsk_buff_free(xdp);
ntc++;
if (ntc >= rx_ring->count)
ntc = 0;
}
}

View file

@ -26,13 +26,10 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
#else
static inline bool
ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
u32 __always_unused budget,
int __always_unused napi_budget)
static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
{
return false;
}