linux-stable/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c

249 lines
8.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include <net/if.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_link.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <linux/udp.h>
#include <bpf/bpf_endian.h>
#include <uapi/linux/netdev.h>
#include "test_xdp_do_redirect.skel.h"
struct udp_packet {
struct ethhdr eth;
struct ipv6hdr iph;
struct udphdr udp;
__u8 payload[64 - sizeof(struct udphdr)
- sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
} __packed;
static struct udp_packet pkt_udp = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
.eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
.eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
.iph.version = 6,
.iph.nexthdr = IPPROTO_UDP,
.iph.payload_len = bpf_htons(sizeof(struct udp_packet)
- offsetof(struct udp_packet, udp)),
.iph.hop_limit = 2,
.iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
.iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
.udp.source = bpf_htons(1),
.udp.dest = bpf_htons(1),
.udp.len = bpf_htons(sizeof(struct udp_packet)
- offsetof(struct udp_packet, udp)),
.payload = {0x42}, /* receiver XDP program matches on this */
};
static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
{
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
int ret;
ret = bpf_tc_hook_create(hook);
if (!ASSERT_OK(ret, "create tc hook"))
return ret;
ret = bpf_tc_attach(hook, &opts);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
bpf_tc_hook_destroy(hook);
return ret;
}
return 0;
}
/* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES &xdp_buff and &xdp_frame are bound in a way that xdp_buff->data_hard_start == xdp_frame It's always the case and e.g. xdp_convert_buff_to_frame() relies on this. IOW, the following: for (u32 i = 0; i < 0xdead; i++) { xdpf = xdp_convert_buff_to_frame(&xdp); xdp_convert_frame_to_buff(xdpf, &xdp); } shouldn't ever modify @xdpf's contents or the pointer itself. However, "live packet" code wrongly treats &xdp_frame as part of its context placed *before* the data_hard_start. With such flow, data_hard_start is sizeof(*xdpf) off to the right and no longer points to the XDP frame. Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several places and praying that there are no more miscalcs left somewhere in the code, unionize ::frm with ::data in a flex array, so that both starts pointing to the actual data_hard_start and the XDP frame actually starts being a part of it, i.e. a part of the headroom, not the context. A nice side effect is that the maximum frame size for this mode gets increased by 40 bytes, as xdp_buff::frame_sz includes everything from data_hard_start (-> includes xdpf already) to the end of XDP/skb shared info. Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it hardcoded for 64 bit && 4k pages, it can be made more flexible later on. Minor: align `&head->data` with how `head->frm` is assigned for consistency. Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for clarity. (was found while testing XDP traffic generator on ice, which calls xdp_convert_frame_to_buff() for each XDP frame) Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2023-02-24 16:36:07 +00:00
* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
* 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
*/
#if defined(__s390x__)
bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES &xdp_buff and &xdp_frame are bound in a way that xdp_buff->data_hard_start == xdp_frame It's always the case and e.g. xdp_convert_buff_to_frame() relies on this. IOW, the following: for (u32 i = 0; i < 0xdead; i++) { xdpf = xdp_convert_buff_to_frame(&xdp); xdp_convert_frame_to_buff(xdpf, &xdp); } shouldn't ever modify @xdpf's contents or the pointer itself. However, "live packet" code wrongly treats &xdp_frame as part of its context placed *before* the data_hard_start. With such flow, data_hard_start is sizeof(*xdpf) off to the right and no longer points to the XDP frame. Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several places and praying that there are no more miscalcs left somewhere in the code, unionize ::frm with ::data in a flex array, so that both starts pointing to the actual data_hard_start and the XDP frame actually starts being a part of it, i.e. a part of the headroom, not the context. A nice side effect is that the maximum frame size for this mode gets increased by 40 bytes, as xdp_buff::frame_sz includes everything from data_hard_start (-> includes xdpf already) to the end of XDP/skb shared info. Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it hardcoded for 64 bit && 4k pages, it can be made more flexible later on. Minor: align `&head->data` with how `head->frm` is assigned for consistency. Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for clarity. (was found while testing XDP traffic generator on ice, which calls xdp_convert_frame_to_buff() for each XDP frame) Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2023-02-24 16:36:07 +00:00
#define MAX_PKT_SIZE 3216
#else
bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES &xdp_buff and &xdp_frame are bound in a way that xdp_buff->data_hard_start == xdp_frame It's always the case and e.g. xdp_convert_buff_to_frame() relies on this. IOW, the following: for (u32 i = 0; i < 0xdead; i++) { xdpf = xdp_convert_buff_to_frame(&xdp); xdp_convert_frame_to_buff(xdpf, &xdp); } shouldn't ever modify @xdpf's contents or the pointer itself. However, "live packet" code wrongly treats &xdp_frame as part of its context placed *before* the data_hard_start. With such flow, data_hard_start is sizeof(*xdpf) off to the right and no longer points to the XDP frame. Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several places and praying that there are no more miscalcs left somewhere in the code, unionize ::frm with ::data in a flex array, so that both starts pointing to the actual data_hard_start and the XDP frame actually starts being a part of it, i.e. a part of the headroom, not the context. A nice side effect is that the maximum frame size for this mode gets increased by 40 bytes, as xdp_buff::frame_sz includes everything from data_hard_start (-> includes xdpf already) to the end of XDP/skb shared info. Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it hardcoded for 64 bit && 4k pages, it can be made more flexible later on. Minor: align `&head->data` with how `head->frm` is assigned for consistency. Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for clarity. (was found while testing XDP traffic generator on ice, which calls xdp_convert_frame_to_buff() for each XDP frame) Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN") Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Link: https://lore.kernel.org/r/20230224163607.2994755-1-aleksander.lobakin@intel.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2023-02-24 16:36:07 +00:00
#define MAX_PKT_SIZE 3408
#endif
static void test_max_pkt_size(int fd)
{
char data[MAX_PKT_SIZE + 1] = {};
int err;
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
.data_size_in = MAX_PKT_SIZE,
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
err = bpf_prog_test_run_opts(fd, &opts);
ASSERT_OK(err, "prog_run_max_size");
opts.data_size_in += 1;
err = bpf_prog_test_run_opts(fd, &opts);
ASSERT_EQ(err, -EINVAL, "prog_run_too_big");
}
#define NUM_PKTS 10000
void test_xdp_do_redirect(void)
{
int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
selftests/bpf: fix "metadata marker" getting overwritten by the netstack Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=<optimized out>) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich <iii@linux.ibm.com> # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-16 17:50:51 +00:00
char data[sizeof(pkt_udp) + sizeof(__u64)];
struct test_xdp_do_redirect *skel = NULL;
struct nstoken *nstoken = NULL;
struct bpf_link *link;
LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
selftests/bpf: fix "metadata marker" getting overwritten by the netstack Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=<optimized out>) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich <iii@linux.ibm.com> # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-16 17:50:51 +00:00
struct xdp_md ctx_in = { .data = sizeof(__u64),
.data_end = sizeof(data) };
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
.data_size_in = sizeof(data),
.ctx_in = &ctx_in,
.ctx_size_in = sizeof(ctx_in),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = NUM_PKTS,
.batch_size = 64,
);
DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
.attach_point = BPF_TC_INGRESS);
selftests/bpf: fix "metadata marker" getting overwritten by the netstack Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=<optimized out>) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich <iii@linux.ibm.com> # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-16 17:50:51 +00:00
memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
*((__u32 *)data) = 0x42; /* metadata test value */
selftests/bpf: fix "metadata marker" getting overwritten by the netstack Alexei noticed xdp_do_redirect test on BPF CI started failing on BE systems after skb PP recycling was enabled: test_xdp_do_redirect:PASS:prog_run 0 nsec test_xdp_do_redirect:PASS:pkt_count_xdp 0 nsec test_xdp_do_redirect:PASS:pkt_count_zero 0 nsec test_xdp_do_redirect:FAIL:pkt_count_tc unexpected pkt_count_tc: actual 220 != expected 9998 test_max_pkt_size:PASS:prog_run_max_size 0 nsec test_max_pkt_size:PASS:prog_run_too_big 0 nsec close_netns:PASS:setns 0 nsec #289 xdp_do_redirect:FAIL Summary: 270/1674 PASSED, 30 SKIPPED, 1 FAILED and it doesn't happen on LE systems. Ilya then hunted it down to: #0 0x0000000000aaeee6 in neigh_hh_output (hh=0x83258df0, skb=0x88142200) at linux/include/net/neighbour.h:503 #1 0x0000000000ab2cda in neigh_output (skip_cache=false, skb=0x88142200, n=<optimized out>) at linux/include/net/neighbour.h:544 #2 ip6_finish_output2 (net=net@entry=0x88edba00, sk=sk@entry=0x0, skb=skb@entry=0x88142200) at linux/net/ipv6/ip6_output.c:134 #3 0x0000000000ab4cbc in __ip6_finish_output (skb=0x88142200, sk=0x0, net=0x88edba00) at linux/net/ipv6/ip6_output.c:195 #4 ip6_finish_output (net=0x88edba00, sk=0x0, skb=0x88142200) at linux/net/ipv6/ip6_output.c:206 xdp_do_redirect test places a u32 marker (0x42) right before the Ethernet header to check it then in the XDP program and return %XDP_ABORTED if it's not there. Neigh xmit code likes to round up hard header length to speed up copying the header, so it overwrites two bytes in front of the Eth header. On LE systems, 0x42 is one byte at `data - 4`, while on BE it's `data - 1`, what explains why it happens only there. It didn't happen previously due to that %XDP_PASS meant the page will be discarded and replaced by a new one, but now it can be recycled as well, while bpf_test_run code doesn't reinitialize the content of recycled pages. This mark is limited to this particular test and its setup though, so there's no need to predict 1000 different possible cases. Just move it 4 bytes to the left, still keeping it 32 bit to match on more bytes. Fixes: 9c94bbf9a87b ("xdp: recycle Page Pool backed skbs built from XDP frames") Reported-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/CAADnVQ+B_JOU+EpP=DKhbY9yXdN6GiRPnpTTXfEZ9sNkUeb-yQ@mail.gmail.com Reported-by: Ilya Leoshkevich <iii@linux.ibm.com> # + debugging Link: https://lore.kernel.org/bpf/8341c1d9f935f410438e79d3bd8a9cc50aefe105.camel@linux.ibm.com Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> Link: https://lore.kernel.org/r/20230316175051.922550-3-aleksander.lobakin@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-16 17:50:51 +00:00
*((__u32 *)data + 4) = 0;
skel = test_xdp_do_redirect__open();
if (!ASSERT_OK_PTR(skel, "skel"))
return;
/* The XDP program we run with bpf_prog_run() will cycle through all
* three xmit (PASS/TX/REDIRECT) return codes starting from above, and
* ending up with PASS, so we should end up with two packets on the dst
* iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
* payload.
*/
SYS(out, "ip netns add testns");
nstoken = open_netns("testns");
if (!ASSERT_OK_PTR(nstoken, "setns"))
goto out;
SYS(out, "ip link add veth_src type veth peer name veth_dst");
SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
SYS(out, "ip link set dev veth_src up");
SYS(out, "ip link set dev veth_dst up");
SYS(out, "ip addr add dev veth_src fc00::1/64");
SYS(out, "ip addr add dev veth_dst fc00::2/64");
SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
/* We enable forwarding in the test namespace because that will cause
* the packets that go through the kernel stack (with XDP_PASS) to be
* forwarded back out the same interface (because of the packet dst
* combined with the interface addresses). When this happens, the
* regular forwarding path will end up going through the same
* veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
* deadlock if it happens on the same CPU. There's a local_bh_disable()
* in the test_run code to prevent this, but an earlier version of the
* code didn't have this, so we keep the test behaviour to make sure the
* bug doesn't resurface.
*/
SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
ifindex_src = if_nametoindex("veth_src");
ifindex_dst = if_nametoindex("veth_dst");
if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
!ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
goto out;
/* Check xdp features supported by veth driver */
err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
if (!ASSERT_OK(err, "veth_src bpf_xdp_query"))
goto out;
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG,
"veth_src query_opts.feature_flags"))
goto out;
err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
goto out;
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG,
"veth_dst query_opts.feature_flags"))
goto out;
/* Enable GRO */
SYS(out, "ethtool -K veth_src gro on");
SYS(out, "ethtool -K veth_dst gro on");
err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
goto out;
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
NETDEV_XDP_ACT_NDO_XMIT_SG,
"veth_src query_opts.feature_flags gro on"))
goto out;
err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
goto out;
if (!ASSERT_EQ(query_opts.feature_flags,
NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
NETDEV_XDP_ACT_NDO_XMIT_SG,
"veth_dst query_opts.feature_flags gro on"))
goto out;
memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
skel->rodata->ifindex_in = ifindex_src;
ctx_in.ingress_ifindex = ifindex_src;
tc_hook.ifindex = ifindex_src;
if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
goto out;
link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
if (!ASSERT_OK_PTR(link, "prog_attach"))
goto out;
skel->links.xdp_count_pkts = link;
tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
if (attach_tc_prog(&tc_hook, tc_prog_fd))
goto out;
xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
if (!ASSERT_OK(err, "prog_run"))
goto out_tc;
/* wait for the packets to be flushed */
kern_sync_rcu();
/* There will be one packet sent through XDP_REDIRECT and one through
* XDP_TX; these will show up on the XDP counting program, while the
* rest will be counted at the TC ingress hook (and the counting program
* resets the packet payload so they don't get counted twice even though
* they are re-xmited out the veth device
*/
ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
test_max_pkt_size(bpf_program__fd(skel->progs.xdp_count_pkts));
out_tc:
bpf_tc_hook_destroy(&tc_hook);
out:
if (nstoken)
close_netns(nstoken);
SYS_NOFAIL("ip netns del testns");
test_xdp_do_redirect__destroy(skel);
}