Merge branch 'xdp-grow-tail'

Jesper Dangaard Brouer says:

====================
V4:
- Fixup checkpatch.pl issues
- Collected more ACKs

V3:
- Fix issue on virtio_net patch spotted by Jason Wang
- Adjust name for variable in mlx5 patch
- Collected more ACKs

V2:
- Fix bug in mlx5 for XDP_PASS case
- Collected nitpicks and ACKs from mailing list

V1:
- Fix bug in dpaa2

XDP have evolved to support several frame sizes, but xdp_buff was not
updated with this information. This have caused the side-effect that
XDP frame data hard end is unknown. This have limited the BPF-helper
bpf_xdp_adjust_tail to only shrink the packet. This patchset address
this and add packet tail extend/grow.

The purpose of the patchset is ALSO to reserve a memory area that can be
used for storing extra information, specifically for extending XDP with
multi-buffer support. One proposal is to use same layout as
skb_shared_info, which is why this area is currently 320 bytes.

When converting xdp_frame to SKB (veth and cpumap), the full tailroom
area can now be used and SKB truesize is now correct. For most
drivers this result in a much larger tailroom in SKB "head" data
area. The network stack can now take advantage of this when doing SKB
coalescing. Thus, a good driver test is to use xdp_redirect_cpu from
samples/bpf/ and do some TCP stream testing.

Use-cases for tail grow/extend:
(1) IPsec / XFRM needs a tail extend[1][2].
(2) DNS-cache responses in XDP.
(3) HAProxy ALOHA would need it to convert to XDP.
(4) Add tail info e.g. timestamp and collect via tcpdump

[1] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf
[2] http://vger.kernel.org/netconf2019.html

Examples on howto access the tail area of an XDP packet is shown in the
XDP-tutorial example[3].

[3] https://github.com/xdp-project/xdp-tutorial/blob/master/experiment01-tailgrow/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2020-05-14 21:21:57 -07:00
commit 5cc5924d83
43 changed files with 452 additions and 116 deletions

View file

@ -1606,6 +1606,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
"%s qid %d\n", __func__, rx_ring->qid);
res_budget = budget;
xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = ENA_PAGE_SIZE;
do {
xdp_verdict = XDP_PASS;

View file

@ -151,8 +151,9 @@
* The buffer size we share with the device is defined to be ENA_PAGE_SIZE
*/
#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
VLAN_HLEN - XDP_PACKET_HEADROOM)
#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
VLAN_HLEN - XDP_PACKET_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))

View file

@ -138,6 +138,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = *data_ptr + *len;
xdp.rxq = &rxr->xdp_rxq;
xdp.frame_sz = PAGE_SIZE; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
orig_data = xdp.data;
rcu_read_lock();

View file

@ -552,6 +552,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
xdp.rxq = &rq->xdp_rxq;
xdp.frame_sz = RCV_FRAG_LEN + XDP_PACKET_HEADROOM;
orig_data = xdp.data;
rcu_read_lock();

View file

@ -331,6 +331,9 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
xdp_set_data_meta_invalid(&xdp);
xdp.rxq = &ch->xdp_rxq;
xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE -
(dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM);
xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
/* xdp.data pointer may have changed */
@ -366,7 +369,11 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
dma_unmap_page(priv->net_dev->dev.parent, addr,
DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
ch->buf_count--;
/* Allow redirect use of full headroom */
xdp.data_hard_start = vaddr;
xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE;
err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
if (unlikely(err))
ch->stats.xdp_drop++;

View file

@ -1507,6 +1507,22 @@ static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = i40e_rx_offset(rx_ring) ?
SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
@ -2246,13 +2262,11 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
rx_buffer->page_offset += truesize;
#endif
}
@ -2335,6 +2349,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
bool failure = false;
struct xdp_buff xdp;
#if (PAGE_SIZE < 8192)
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
#endif
xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < (unsigned int)budget)) {
@ -2389,7 +2406,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
#endif
skb = i40e_run_xdp(rx_ring, &xdp);
}

View file

@ -531,12 +531,14 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *bi;

View file

@ -423,6 +423,22 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
return 0;
}
static unsigned int ice_rx_frame_truesize(struct ice_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ice_rx_offset(rx_ring) ?
SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
/**
* ice_run_xdp - Executes an XDP program on initialized xdp_buff
* @rx_ring: Rx ring
@ -991,6 +1007,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
bool failure;
xdp.rxq = &rx_ring->xdp_rxq;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0);
#endif
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
@ -1038,6 +1058,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
@ -1051,16 +1075,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
if (!xdp_res)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = ice_rx_pg_size(rx_ring) / 2;
#else
truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
size);
#endif
xdp_xmit |= xdp_res;
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
} else {
rx_buf->pagecnt_bias++;
}

View file

@ -840,11 +840,13 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_xmit = 0;
bool failure = false;
struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;

View file

@ -2244,19 +2244,30 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
return ERR_PTR(-result);
}
static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *rx_buffer,
unsigned int size)
{
unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
SKB_DATA_ALIGN(size);
rx_buffer->page_offset += truesize;
#endif
}
@ -2290,6 +2301,11 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
xdp.rxq = &rx_ring->xdp_rxq;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0);
#endif
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *rx_buffer;
@ -2323,7 +2339,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}

View file

@ -431,12 +431,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct ixgbe_adapter *adapter = q_vector->adapter;
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;

View file

@ -1095,19 +1095,31 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
return ERR_PTR(-result);
}
static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
SKB_DATA_ALIGN(size);
rx_buffer->page_offset += truesize;
#endif
}
@ -1125,6 +1137,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
xdp.rxq = &rx_ring->xdp_rxq;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0);
#endif
while (likely(total_rx_packets < budget)) {
struct ixgbevf_rx_buffer *rx_buffer;
union ixgbe_adv_rx_desc *rx_desc;
@ -1157,7 +1174,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
xdp.data_hard_start = xdp.data -
ixgbevf_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
}

View file

@ -2148,12 +2148,17 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
struct bpf_prog *prog, struct xdp_buff *xdp,
struct mvneta_stats *stats)
{
unsigned int len;
unsigned int len, sync;
struct page *page;
u32 ret, act;
len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
act = bpf_prog_run_xdp(prog, xdp);
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
sync = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
sync = max(sync, len);
switch (act) {
case XDP_PASS:
stats->xdp_pass++;
@ -2164,9 +2169,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
err = xdp_do_redirect(pp->dev, xdp, prog);
if (unlikely(err)) {
ret = MVNETA_XDP_DROPPED;
page_pool_put_page(rxq->page_pool,
virt_to_head_page(xdp->data), len,
true);
page = virt_to_head_page(xdp->data);
page_pool_put_page(rxq->page_pool, page, sync, true);
} else {
ret = MVNETA_XDP_REDIR;
stats->xdp_redirect++;
@ -2175,10 +2179,10 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
}
case XDP_TX:
ret = mvneta_xdp_xmit_back(pp, xdp);
if (ret != MVNETA_XDP_TX)
page_pool_put_page(rxq->page_pool,
virt_to_head_page(xdp->data), len,
true);
if (ret != MVNETA_XDP_TX) {
page = virt_to_head_page(xdp->data);
page_pool_put_page(rxq->page_pool, page, sync, true);
}
break;
default:
bpf_warn_invalid_xdp_action(act);
@ -2187,8 +2191,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
trace_xdp_exception(pp->dev, prog, act);
/* fall through */
case XDP_DROP:
page_pool_put_page(rxq->page_pool,
virt_to_head_page(xdp->data), len, true);
page = virt_to_head_page(xdp->data);
page_pool_put_page(rxq->page_pool, page, sync, true);
ret = MVNETA_XDP_DROPPED;
stats->xdp_drop++;
break;
@ -2320,6 +2324,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
rcu_read_lock();
xdp_prog = READ_ONCE(pp->xdp_prog);
xdp_buf.rxq = &rxq->xdp_rxq;
xdp_buf.frame_sz = PAGE_SIZE;
/* Fairness NAPI loop */
while (rx_proc < budget && rx_proc < rx_todo) {

View file

@ -51,7 +51,8 @@
#include "en_port.h"
#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
XDP_PACKET_HEADROOM))
XDP_PACKET_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))))
int mlx4_en_setup_tc(struct net_device *dev, u8 up)
{

View file

@ -683,6 +683,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog);
xdp.rxq = &ring->xdp_rxq;
xdp.frame_sz = priv->frag_info[0].frag_stride;
doorbell_pending = 0;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx

View file

@ -625,6 +625,7 @@ struct mlx5e_rq {
struct {
u16 umem_headroom;
u16 headroom;
u32 frame0_sz;
u8 map_dir; /* dma map direction */
} buff;

View file

@ -137,6 +137,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
if (xsk)
xdp.handle = di->xsk.handle;
xdp.rxq = &rq->xdp_rxq;
xdp.frame_sz = rq->buff.frame0_sz;
act = bpf_prog_run_xdp(prog, &xdp);
if (xsk) {

View file

@ -462,6 +462,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
goto err_rq_wq_destroy;
@ -485,6 +487,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
rq->wqe.info = rqp->frags_info;
rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
rq->wqe.frags =
kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
(wq_sz << rq->wqe.info.log_num_frags)),
@ -522,6 +526,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
if (xsk) {
rq->buff.frame0_sz = xsk_umem_xdp_frame_sz(umem);
err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
if (unlikely(err)) {
mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",

View file

@ -1070,6 +1070,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
if (consumed)
return NULL; /* page/packet was consumed by XDP */
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
if (unlikely(!skb))
return NULL;
@ -1371,6 +1372,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
return NULL; /* page/packet was consumed by XDP */
}
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
if (unlikely(!skb))
return NULL;

View file

@ -1741,10 +1741,15 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
unsigned int pkt_len, bool *completed)
{
unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
int wr_idx;
/* Reject if xdp_adjust_tail grow packet beyond DMA area */
if (pkt_len + dma_off > dma_map_sz)
return false;
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
if (!*completed) {
nfp_net_xdp_complete(tx_ring);
@ -1817,6 +1822,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
rcu_read_lock();
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
xdp.frame_sz = PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM;
xdp.rxq = &rx_ring->xdp_rxq;
tx_ring = r_vec->xdp_ring;

View file

@ -1066,6 +1066,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.rxq = &rxq->xdp_rxq;
xdp.frame_sz = rxq->rx_buf_seg_size; /* PAGE_SIZE when XDP enabled */
/* Queues always have a full reset currently, so for the time
* being until there's atomic program replace just mark read

View file

@ -1476,7 +1476,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
if (rxq->rx_buf_size + size > PAGE_SIZE)
rxq->rx_buf_size = PAGE_SIZE - size;
/* Segment size to spilt a page in multiple equal parts ,
/* Segment size to split a page in multiple equal parts,
* unless XDP is used in which case we'd use the entire page.
*/
if (!edev->xdp_prog) {

View file

@ -308,6 +308,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + rx_buf->len;
xdp.rxq = &rx_queue->xdp_rxq_info;
xdp.frame_sz = efx->rx_page_buf_step;
xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
rcu_read_unlock();

View file

@ -884,23 +884,28 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
struct xdp_buff *xdp)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
unsigned int len = xdp->data_end - xdp->data;
unsigned int sync, len = xdp->data_end - xdp->data;
u32 ret = NETSEC_XDP_PASS;
struct page *page;
int err;
u32 act;
act = bpf_prog_run_xdp(prog, xdp);
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
sync = xdp->data_end - xdp->data_hard_start - NETSEC_RXBUF_HEADROOM;
sync = max(sync, len);
switch (act) {
case XDP_PASS:
ret = NETSEC_XDP_PASS;
break;
case XDP_TX:
ret = netsec_xdp_xmit_back(priv, xdp);
if (ret != NETSEC_XDP_TX)
page_pool_put_page(dring->page_pool,
virt_to_head_page(xdp->data), len,
true);
if (ret != NETSEC_XDP_TX) {
page = virt_to_head_page(xdp->data);
page_pool_put_page(dring->page_pool, page, sync, true);
}
break;
case XDP_REDIRECT:
err = xdp_do_redirect(priv->ndev, xdp, prog);
@ -908,9 +913,8 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
ret = NETSEC_XDP_REDIR;
} else {
ret = NETSEC_XDP_CONSUMED;
page_pool_put_page(dring->page_pool,
virt_to_head_page(xdp->data), len,
true);
page = virt_to_head_page(xdp->data);
page_pool_put_page(dring->page_pool, page, sync, true);
}
break;
default:
@ -921,8 +925,8 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
/* fall through -- handle aborts by dropping packet */
case XDP_DROP:
ret = NETSEC_XDP_CONSUMED;
page_pool_put_page(dring->page_pool,
virt_to_head_page(xdp->data), len, true);
page = virt_to_head_page(xdp->data);
page_pool_put_page(dring->page_pool, page, sync, true);
break;
}
@ -936,10 +940,14 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
struct netsec_rx_pkt_info rx_info;
enum dma_data_direction dma_dir;
struct bpf_prog *xdp_prog;
struct xdp_buff xdp;
u16 xdp_xmit = 0;
u32 xdp_act = 0;
int done = 0;
xdp.rxq = &dring->xdp_rxq;
xdp.frame_sz = PAGE_SIZE;
rcu_read_lock();
xdp_prog = READ_ONCE(priv->xdp_prog);
dma_dir = page_pool_get_dma_dir(dring->page_pool);
@ -953,7 +961,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
struct sk_buff *skb = NULL;
u16 pkt_len, desc_len;
dma_addr_t dma_handle;
struct xdp_buff xdp;
void *buf_addr;
if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
@ -1002,7 +1009,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + pkt_len;
xdp.rxq = &dring->xdp_rxq;
if (xdp_prog) {
xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);

View file

@ -406,6 +406,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
xdp.data_hard_start = pa;
xdp.rxq = &priv->xdp_rxq[ch];
xdp.frame_sz = PAGE_SIZE;
port = priv->emac_port + cpsw->data.dual_emac;
ret = cpsw_run_xdp(priv, ch, &xdp, page, port);

View file

@ -348,6 +348,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
xdp.data_hard_start = pa;
xdp.rxq = &priv->xdp_rxq[ch];
xdp.frame_sz = PAGE_SIZE;
ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port);
if (ret != CPSW_XDP_PASS)

View file

@ -49,6 +49,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
xdp_set_data_meta_invalid(xdp);
xdp->data_end = xdp->data + len;
xdp->rxq = &nvchan->xdp_rxq;
xdp->frame_sz = PAGE_SIZE;
xdp->handle = 0;
memcpy(xdp->data, data, len);

View file

@ -795,7 +795,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
if (xbuf) {
unsigned int hdroom = xdp->data - xdp->data_hard_start;
unsigned int xlen = xdp->data_end - xdp->data;
unsigned int frag_size = netvsc_xdp_fraglen(hdroom + xlen);
unsigned int frag_size = xdp->frame_sz;
skb = build_skb(xbuf, frag_size);

View file

@ -1671,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
xdp.rxq = &tfile->xdp_rxq;
xdp.frame_sz = buflen;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act == XDP_REDIRECT || act == XDP_TX) {
@ -2411,6 +2412,7 @@ static int tun_xdp_one(struct tun_struct *tun,
}
xdp_set_data_meta_invalid(xdp);
xdp->rxq = &tfile->xdp_rxq;
xdp->frame_sz = buflen;
act = bpf_prog_run_xdp(xdp_prog, xdp);
err = tun_xdp_act(tun, xdp_prog, xdp, act);

View file

@ -405,10 +405,6 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
{
struct sk_buff *skb;
if (!buflen) {
buflen = SKB_DATA_ALIGN(headroom + len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
skb = build_skb(head, buflen);
if (!skb)
return NULL;
@ -564,13 +560,15 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
struct veth_stats *stats)
{
void *hard_start = frame->data - frame->headroom;
void *head = hard_start - sizeof(struct xdp_frame);
int len = frame->len, delta = 0;
struct xdp_frame orig_frame;
struct bpf_prog *xdp_prog;
unsigned int headroom;
struct sk_buff *skb;
/* bpf_xdp_adjust_head() assures BPF cannot access xdp_frame area */
hard_start -= sizeof(struct xdp_frame);
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (likely(xdp_prog)) {
@ -581,6 +579,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
xdp.data = frame->data;
xdp.data_end = frame->data + frame->len;
xdp.data_meta = frame->data - frame->metasize;
xdp.frame_sz = frame->frame_sz;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@ -592,7 +591,6 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
break;
case XDP_TX:
orig_frame = *frame;
xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem;
if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
@ -605,7 +603,6 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem;
if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
frame = &orig_frame;
@ -629,7 +626,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
rcu_read_unlock();
headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
skb = veth_build_skb(head, headroom, len, 0);
skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz);
if (!skb) {
xdp_return_frame(frame);
stats->rx_drops++;
@ -695,9 +692,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
goto drop;
}
nskb = veth_build_skb(head,
VETH_XDP_HEADROOM + mac_len, skb->len,
PAGE_SIZE);
nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
skb->len, PAGE_SIZE);
if (!nskb) {
page_frag_free(head);
goto drop;
@ -715,6 +711,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
xdp.data_end = xdp.data + pktlen;
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
/* SKB "head" area always have tailroom for skb_shared_info */
xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start;
xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
orig_data = xdp.data;
orig_data_end = xdp.data_end;
@ -758,6 +759,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
}
rcu_read_unlock();
/* check if bpf_xdp_adjust_head was used */
delta = orig_data - xdp.data;
off = mac_len + delta;
if (off > 0)
@ -765,9 +767,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
else if (off < 0)
__skb_pull(skb, -off);
skb->mac_header -= delta;
/* check if bpf_xdp_adjust_tail was used */
off = xdp.data_end - orig_data_end;
if (off != 0)
__skb_put(skb, off);
__skb_put(skb, off); /* positive on grow, negative on shrink */
skb->protocol = eth_type_trans(skb, rq->dev);
metalen = xdp.data - xdp.data_meta;

View file

@ -689,6 +689,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp.data_end = xdp.data + len;
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
xdp.frame_sz = buflen;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
@ -797,10 +798,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
int offset = buf - page_address(page);
struct sk_buff *head_skb, *curr_skb;
struct bpf_prog *xdp_prog;
unsigned int truesize;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
int err;
unsigned int metasize = 0;
unsigned int frame_sz;
int err;
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
@ -821,6 +823,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
/* Buffers with headroom use PAGE_SIZE as alloc size,
* see add_recvbuf_mergeable() + get_mergeable_buf_len()
*/
frame_sz = headroom ? PAGE_SIZE : truesize;
/* This happens when rx buffer size is underestimated
* or headroom is not enough because of the buffer
* was refilled before XDP is set. This should only
@ -834,6 +841,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
page, offset,
VIRTIO_XDP_HEADROOM,
&len);
frame_sz = PAGE_SIZE;
if (!xdp_page)
goto err_xdp;
offset = VIRTIO_XDP_HEADROOM;
@ -850,6 +859,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdp.data_end = xdp.data + (len - vi->hdr_len);
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
xdp.frame_sz = frame_sz - vi->hdr_len;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
@ -924,7 +934,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
}
rcu_read_unlock();
truesize = mergeable_ctx_to_truesize(ctx);
if (unlikely(len > truesize)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)ctx);

View file

@ -747,6 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
xdp->data = buf + pad;
xdp->data_end = xdp->data + len;
hdr->buflen = buflen;
xdp->frame_sz = buflen;
--net->refcnt_bias;
alloc_frag->offset += buflen;

View file

@ -6,6 +6,8 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
#include <linux/skbuff.h> /* skb_shared_info */
/**
* DOC: XDP RX-queue information
*
@ -70,13 +72,25 @@ struct xdp_buff {
void *data_hard_start;
unsigned long handle;
struct xdp_rxq_info *rxq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
};
/* Reserve memory area at end-of data area.
*
* This macro reserves tailroom in the XDP buffer by limiting the
* XDP/BPF data access to data_hard_end. Notice same area (and size)
* is used for XDP_PASS, when constructing the SKB via build_skb().
*/
#define xdp_data_hard_end(xdp) \
((xdp)->data_hard_start + (xdp)->frame_sz - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
struct xdp_frame {
void *data;
u16 len;
u16 headroom;
u16 metasize;
u32 metasize:8;
u32 frame_sz:24;
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
* while mem info is valid on remote CPU.
*/
@ -91,6 +105,10 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
frame->dev_rx = NULL;
}
/* Avoids inlining WARN macro in fast-path */
void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
/* Convert xdp_buff to xdp_frame */
@ -111,6 +129,12 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
return NULL;
/* Catch if driver didn't reserve tailroom for skb_shared_info */
if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
XDP_WARN("Driver BUG: missing reserved tailroom");
return NULL;
}
/* Store info in top of packet */
xdp_frame = xdp->data_hard_start;
@ -118,6 +142,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
xdp_frame->len = xdp->data_end - xdp->data;
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
xdp_frame->frame_sz = xdp->frame_sz;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;

View file

@ -236,6 +236,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
else
return address + offset;
}
static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
{
return umem->chunk_size_nohr + umem->headroom;
}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
@ -366,6 +372,11 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
{
return 0;
}
static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -EOPNOTSUPP;

View file

@ -2015,8 +2015,8 @@ union bpf_attr {
* int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
* only possible to shrink the packet as of this writing,
* therefore *delta* must be a negative integer.
* possible to both shrink and grow the packet tail.
* Shrink done via *delta* being a negative integer.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers

View file

@ -162,25 +162,10 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
/* Part of headroom was reserved to xdpf */
hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
/* build_skb need to place skb_shared_info after SKB end, and
* also want to know the memory "truesize". Thus, need to
* know the memory frame size backing xdp_buff.
*
* XDP was designed to have PAGE_SIZE frames, but this
* assumption is not longer true with ixgbe and i40e. It
* would be preferred to set frame_size to 2048 or 4096
* depending on the driver.
* frame_size = 2048;
* frame_len = frame_size - sizeof(*xdp_frame);
*
* Instead, with info avail, skb_shared_info in placed after
* packet len. This, unfortunately fakes the truesize.
* Another disadvantage of this approach, the skb_shared_info
* is not at a fixed memory location, with mixed length
* packets, which is bad for cache-line hotness.
/* Memory size backing xdp_frame data already have reserved
* room for build_skb to place skb_shared_info in tailroom.
*/
frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
frame_size = xdpf->frame_sz;
pkt_data_start = xdpf->data - hard_start_headroom;
skb = build_skb_around(skb, pkt_data_start, frame_size);

View file

@ -470,25 +470,34 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
u32 headroom = XDP_PACKET_HEADROOM;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
u32 max_data_sz;
void *data;
int ret;
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
/* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
if (size > max_data_sz)
return -EINVAL;
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
if (IS_ERR(data))
return PTR_ERR(data);
xdp.data_hard_start = data;
xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
xdp.data = data + headroom;
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
xdp.frame_sz = headroom + max_data_sz + tailroom;
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
@ -496,8 +505,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
xdp.data_end != xdp.data + size)
if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
out:

View file

@ -4617,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + hlen;
xdp->data_hard_start = skb->data - skb_headroom(skb);
/* SKB "head" area always have tailroom for skb_shared_info */
xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
@ -4640,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb_reset_network_header(skb);
}
/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
* pckt.
*/
off = orig_data_end - xdp->data_end;
/* check if bpf_xdp_adjust_tail was used */
off = xdp->data_end - orig_data_end;
if (off != 0) {
skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
skb->len -= off;
skb->len += off; /* positive on grow, negative on shrink */
}
/* check if XDP changed eth hdr such SKB needs update */

View file

@ -3411,15 +3411,26 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
/* only shrinking is allowed for now. */
if (unlikely(offset >= 0))
/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
/* ALL drivers MUST init xdp->frame_sz, chicken check below */
if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
return -EINVAL;
}
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
/* Clear memory area on grow, can contain uninit kernel memory */
if (offset > 0)
memset(xdp->data_end, 0, offset);
xdp->data_end = data_end;
return 0;

View file

@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/bug.h>
#include <net/page_pool.h>
#include <net/xdp.h>
@ -496,3 +497,10 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
return xdpf;
}
EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
void xdp_warn(const char *msg, const char *func, const int line)
{
WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
};
EXPORT_SYMBOL_GPL(xdp_warn);

View file

@ -2,13 +2,13 @@
#include <test_progs.h>
#include <network_helpers.h>
void test_xdp_adjust_tail(void)
void test_xdp_adjust_tail_shrink(void)
{
const char *file = "./test_adjust_tail.o";
const char *file = "./test_xdp_adjust_tail_shrink.o";
__u32 duration, retval, size, expect_sz;
struct bpf_object *obj;
char buf[128];
__u32 duration, retval, size;
int err, prog_fd;
char buf[128];
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
@ -21,10 +21,121 @@ void test_xdp_adjust_tail(void)
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || retval != XDP_TX || size != 54,
"ipv6", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
CHECK(err || retval != XDP_TX || size != expect_sz,
"ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
err, errno, retval, size, expect_sz);
bpf_object__close(obj);
}
void test_xdp_adjust_tail_grow(void)
{
const char *file = "./test_xdp_adjust_tail_grow.o";
struct bpf_object *obj;
char buf[4096]; /* avoid segfault: large buf to hold grow results */
__u32 duration, retval, size, expect_sz;
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || retval != XDP_DROP,
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
buf, &size, &retval, &duration);
CHECK(err || retval != XDP_TX || size != expect_sz,
"ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
err, errno, retval, size, expect_sz);
bpf_object__close(obj);
}
void test_xdp_adjust_tail_grow2(void)
{
const char *file = "./test_xdp_adjust_tail_grow.o";
char buf[4096]; /* avoid segfault: large buf to hold grow results */
int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
struct bpf_object *obj;
int err, cnt, i;
int max_grow;
struct bpf_prog_test_run_attr tattr = {
.repeat = 1,
.data_in = &buf,
.data_out = &buf,
.data_size_in = 0, /* Per test */
.data_size_out = 0, /* Per test */
};
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
return;
/* Test case-64 */
memset(buf, 1, sizeof(buf));
tattr.data_size_in = 64; /* Determine test case via pkt size */
tattr.data_size_out = 128; /* Limit copy_size */
/* Kernel side alloc packet memory area that is zero init */
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
|| tattr.retval != XDP_TX
|| tattr.data_size_out != 192, /* Expected grow size */
"case-64",
"err %d errno %d retval %d size %d\n",
err, errno, tattr.retval, tattr.data_size_out);
/* Extra checks for data contents */
CHECK_ATTR(tattr.data_size_out != 192
|| buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
|| buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
|| buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
"case-64-data",
"err %d errno %d retval %d size %d\n",
err, errno, tattr.retval, tattr.data_size_out);
/* Test case-128 */
memset(buf, 2, sizeof(buf));
tattr.data_size_in = 128; /* Determine test case via pkt size */
tattr.data_size_out = sizeof(buf); /* Copy everything */
err = bpf_prog_test_run_xattr(&tattr);
max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
CHECK_ATTR(err
|| tattr.retval != XDP_TX
|| tattr.data_size_out != max_grow,/* Expect max grow size */
"case-128",
"err %d errno %d retval %d size %d expect-size %d\n",
err, errno, tattr.retval, tattr.data_size_out, max_grow);
/* Extra checks for data content: Count grow size, will contain zeros */
for (i = 0, cnt = 0; i < sizeof(buf); i++) {
if (buf[i] == 0)
cnt++;
}
CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
|| tattr.data_size_out != max_grow, /* Total grow size */
"case-128-data",
"err %d errno %d retval %d size %d grow-size %d\n",
err, errno, tattr.retval, tattr.data_size_out, cnt);
bpf_object__close(obj);
}
void test_xdp_adjust_tail(void)
{
if (test__start_subtest("xdp_adjust_tail_shrink"))
test_xdp_adjust_tail_shrink();
if (test__start_subtest("xdp_adjust_tail_grow"))
test_xdp_adjust_tail_grow();
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
}

View file

@ -0,0 +1,33 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
SEC("xdp_adjust_tail_grow")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
unsigned int data_len;
int offset = 0;
/* Data length determine test case */
data_len = data_end - data;
if (data_len == 54) { /* sizeof(pkt_v4) */
offset = 4096; /* test too large offset */
} else if (data_len == 74) { /* sizeof(pkt_v6) */
offset = 40;
} else if (data_len == 64) {
offset = 128;
} else if (data_len == 128) {
offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
} else {
return XDP_ABORTED; /* No matching test */
}
if (bpf_xdp_adjust_tail(xdp, offset))
return XDP_DROP;
return XDP_TX;
}
char _license[] SEC("license") = "GPL";

View file

@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -11,15 +11,15 @@
int _version SEC("version") = 1;
SEC("xdp_adjust_tail")
int _xdp_adjust_tail(struct xdp_md *xdp)
SEC("xdp_adjust_tail_shrink")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int offset = 0;
if (data_end - data == 54)
offset = 256;
if (data_end - data == 54) /* sizeof(pkt_v4) */
offset = 256; /* shrink too much */
else
offset = 20;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))