linux-stable/tools/testing/selftests/bpf/prog_tests/tc_redirect.c

1150 lines
30 KiB
C

// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
* between src and dst. The netns fwd has veth links to each src and dst. The
* client is in src and server in dst. The test installs a TC BPF program to each
* host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
* neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
* switch from ingress side; it also installs a checker prog on the egress side
* to drop unexpected traffic.
*/
#include <arpa/inet.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
#include <linux/time_types.h>
#include <linux/net_tstamp.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
#include "test_tc_neigh_fib.skel.h"
#include "test_tc_neigh.skel.h"
#include "test_tc_peer.skel.h"
#include "test_tc_dtime.skel.h"
#ifndef TCP_TX_DELAY
#define TCP_TX_DELAY 37
#endif
#define NS_SRC "ns_src"
#define NS_FWD "ns_fwd"
#define NS_DST "ns_dst"
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
#define IP4_TUN_SRC "172.17.1.100"
#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
#define IP6_SRC "0::1:dead:beef:cafe"
#define IP6_DST "0::2:dead:beef:cafe"
#define IP6_TUN_SRC "1::1:dead:beef:cafe"
#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
#define MAC_DST_FWD "00:11:22:33:44:55"
#define MAC_DST "00:22:33:44:55:66"
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
#define TIMEOUT_MILLIS 10000
#define NSEC_PER_SEC 1000000000ULL
#define log_err(MSG, ...) \
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
static int write_file(const char *path, const char *newval)
{
FILE *f;
f = fopen(path, "r+");
if (!f)
return -1;
if (fwrite(newval, strlen(newval), 1, f) != 1) {
log_err("writing to %s failed", path);
fclose(f);
return -1;
}
fclose(f);
return 0;
}
static int netns_setup_namespaces(const char *verb)
{
const char * const *ns = namespaces;
char cmd[128];
while (*ns) {
snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
if (!ASSERT_OK(system(cmd), cmd))
return -1;
ns++;
}
return 0;
}
static void netns_setup_namespaces_nofail(const char *verb)
{
const char * const *ns = namespaces;
char cmd[128];
while (*ns) {
snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
system(cmd);
ns++;
}
}
struct netns_setup_result {
int ifindex_veth_src_fwd;
int ifindex_veth_dst_fwd;
};
static int get_ifaddr(const char *name, char *ifaddr)
{
char path[PATH_MAX];
FILE *f;
int ret;
snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
f = fopen(path, "r");
if (!ASSERT_OK_PTR(f, path))
return -1;
ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
fclose(f);
return -1;
}
fclose(f);
return 0;
}
static int get_ifindex(const char *name)
{
char path[PATH_MAX];
char buf[32];
FILE *f;
int ret;
snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
f = fopen(path, "r");
if (!ASSERT_OK_PTR(f, path))
return -1;
ret = fread(buf, 1, sizeof(buf), f);
if (!ASSERT_GT(ret, 0, "fread ifindex")) {
fclose(f);
return -1;
}
fclose(f);
return atoi(buf);
}
#define SYS(fmt, ...) \
({ \
char cmd[1024]; \
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
if (!ASSERT_OK(system(cmd), cmd)) \
goto fail; \
})
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
SYS("ip link set veth_dst address " MAC_DST);
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
goto fail;
result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
if (result->ifindex_veth_dst_fwd < 0)
goto fail;
SYS("ip link set veth_src netns " NS_SRC);
SYS("ip link set veth_src_fwd netns " NS_FWD);
SYS("ip link set veth_dst_fwd netns " NS_FWD);
SYS("ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
SYS("ip link set dev veth_src up");
SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
close_netns(nstoken);
/** setup in 'fwd' namespace */
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto fail;
/* The fwd netns automatically gets a v6 LL address / routes, but also
* needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply.
*/
SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
SYS("ip link set dev veth_src_fwd up");
SYS("ip link set dev veth_dst_fwd up");
SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
close_netns(nstoken);
/** setup in 'dst' namespace */
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
SYS("ip link set dev veth_dst up");
SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
close_netns(nstoken);
return 0;
fail:
if (nstoken)
close_netns(nstoken);
return -1;
}
static int netns_load_bpf(void)
{
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
SYS("tc qdisc add dev veth_dst_fwd clsact");
SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
DST_PROG_PIN_FILE);
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
return 0;
fail:
return -1;
}
static void test_tcp(int family, const char *addr, __u16 port)
{
int listen_fd = -1, accept_fd = -1, client_fd = -1;
char buf[] = "testing testing";
int n;
struct nstoken *nstoken;
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
if (!ASSERT_GE(listen_fd, 0, "listen"))
goto done;
close_netns(nstoken);
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
goto done;
accept_fd = accept(listen_fd, NULL, NULL);
if (!ASSERT_GE(accept_fd, 0, "accept"))
goto done;
if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
goto done;
n = write(client_fd, buf, sizeof(buf));
if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
goto done;
n = read(accept_fd, buf, sizeof(buf));
ASSERT_EQ(n, sizeof(buf), "recv from server");
done:
if (nstoken)
close_netns(nstoken);
if (listen_fd >= 0)
close(listen_fd);
if (accept_fd >= 0)
close(accept_fd);
if (client_fd >= 0)
close(client_fd);
}
static int test_ping(int family, const char *addr)
{
SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
}
static void test_connectivity(void)
{
test_tcp(AF_INET, IP4_DST, IP4_PORT);
test_ping(AF_INET, IP4_DST);
test_tcp(AF_INET6, IP6_DST, IP6_PORT);
test_ping(AF_INET6, IP6_DST);
}
static int set_forwarding(bool enable)
{
int err;
err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
return err;
err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
return err;
return 0;
}
static void rcv_tstamp(int fd, const char *expected, size_t s)
{
struct __kernel_timespec pkt_ts = {};
char ctl[CMSG_SPACE(sizeof(pkt_ts))];
struct timespec now_ts;
struct msghdr msg = {};
__u64 now_ns, pkt_ns;
struct cmsghdr *cmsg;
struct iovec iov;
char data[32];
int ret;
iov.iov_base = data;
iov.iov_len = sizeof(data);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = &ctl;
msg.msg_controllen = sizeof(ctl);
ret = recvmsg(fd, &msg, 0);
if (!ASSERT_EQ(ret, s, "recvmsg"))
return;
ASSERT_STRNEQ(data, expected, s, "expected rcv data");
cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
ret = clock_gettime(CLOCK_REALTIME, &now_ts);
ASSERT_OK(ret, "clock_gettime");
now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
"check rcv tstamp");
}
static void snd_tstamp(int fd, char *b, size_t s)
{
struct sock_txtime opt = { .clockid = CLOCK_TAI };
char ctl[CMSG_SPACE(sizeof(__u64))];
struct timespec now_ts;
struct msghdr msg = {};
struct cmsghdr *cmsg;
struct iovec iov;
__u64 now_ns;
int ret;
ret = clock_gettime(CLOCK_TAI, &now_ts);
ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
iov.iov_base = b;
iov.iov_len = s;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = &ctl;
msg.msg_controllen = sizeof(ctl);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_TXTIME;
cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
*(__u64 *)CMSG_DATA(cmsg) = now_ns;
ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
ret = sendmsg(fd, &msg, 0);
ASSERT_EQ(ret, s, "sendmsg");
}
static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
{
int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
char buf[] = "testing testing";
struct nstoken *nstoken;
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, type, addr, port, 0);
close_netns(nstoken);
if (!ASSERT_GE(listen_fd, 0, "listen"))
return;
/* Ensure the kernel puts the (rcv) timestamp for all skb */
err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
&opt, sizeof(opt));
if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
goto done;
if (type == SOCK_STREAM) {
/* Ensure the kernel set EDT when sending out rst/ack
* from the kernel's ctl_sk.
*/
err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
sizeof(opt));
if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
goto done;
}
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
close_netns(nstoken);
if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
goto done;
if (type == SOCK_STREAM) {
int n;
accept_fd = accept(listen_fd, NULL, NULL);
if (!ASSERT_GE(accept_fd, 0, "accept"))
goto done;
n = write(client_fd, buf, sizeof(buf));
if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
goto done;
rcv_tstamp(accept_fd, buf, sizeof(buf));
} else {
snd_tstamp(client_fd, buf, sizeof(buf));
rcv_tstamp(listen_fd, buf, sizeof(buf));
}
done:
close(listen_fd);
if (accept_fd != -1)
close(accept_fd);
if (client_fd != -1)
close(client_fd);
}
static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
{
struct nstoken *nstoken;
#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
#define PIN(__prog) ({ \
int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
if (!ASSERT_OK(err, "pin " #__prog)) \
goto fail; \
})
/* setup ns_src tc progs */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return -1;
PIN(egress_host);
PIN(ingress_host);
SYS("tc qdisc add dev veth_src clsact");
SYS("tc filter add dev veth_src ingress bpf da object-pinned "
PIN_FNAME(ingress_host));
SYS("tc filter add dev veth_src egress bpf da object-pinned "
PIN_FNAME(egress_host));
close_netns(nstoken);
/* setup ns_dst tc progs */
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
return -1;
PIN(egress_host);
PIN(ingress_host);
SYS("tc qdisc add dev veth_dst clsact");
SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
PIN_FNAME(ingress_host));
SYS("tc filter add dev veth_dst egress bpf da object-pinned "
PIN_FNAME(egress_host));
close_netns(nstoken);
/* setup ns_fwd tc progs */
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
return -1;
PIN(ingress_fwdns_prio100);
PIN(egress_fwdns_prio100);
PIN(ingress_fwdns_prio101);
PIN(egress_fwdns_prio101);
SYS("tc qdisc add dev veth_dst_fwd clsact");
SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
PIN_FNAME(ingress_fwdns_prio100));
SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
PIN_FNAME(ingress_fwdns_prio101));
SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
PIN_FNAME(egress_fwdns_prio100));
SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
PIN_FNAME(egress_fwdns_prio101));
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
PIN_FNAME(ingress_fwdns_prio100));
SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
PIN_FNAME(ingress_fwdns_prio101));
SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
PIN_FNAME(egress_fwdns_prio100));
SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
PIN_FNAME(egress_fwdns_prio101));
close_netns(nstoken);
#undef PIN
return 0;
fail:
close_netns(nstoken);
return -1;
}
enum {
INGRESS_FWDNS_P100,
INGRESS_FWDNS_P101,
EGRESS_FWDNS_P100,
EGRESS_FWDNS_P101,
INGRESS_ENDHOST,
EGRESS_ENDHOST,
SET_DTIME,
__MAX_CNT,
};
const char *cnt_names[] = {
"ingress_fwdns_p100",
"ingress_fwdns_p101",
"egress_fwdns_p100",
"egress_fwdns_p101",
"ingress_endhost",
"egress_endhost",
"set_dtime",
};
enum {
TCP_IP6_CLEAR_DTIME,
TCP_IP4,
TCP_IP6,
UDP_IP4,
UDP_IP6,
TCP_IP4_RT_FWD,
TCP_IP6_RT_FWD,
UDP_IP4_RT_FWD,
UDP_IP6_RT_FWD,
UKN_TEST,
__NR_TESTS,
};
const char *test_names[] = {
"tcp ip6 clear dtime",
"tcp ip4",
"tcp ip6",
"udp ip4",
"udp ip6",
"tcp ip4 rt fwd",
"tcp ip6 rt fwd",
"udp ip4 rt fwd",
"udp ip6 rt fwd",
};
static const char *dtime_cnt_str(int test, int cnt)
{
static char name[64];
snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
return name;
}
static const char *dtime_err_str(int test, int cnt)
{
static char name[64];
snprintf(name, sizeof(name), "%s %s errs", test_names[test],
cnt_names[cnt]);
return name;
}
static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
{
int i, t = TCP_IP6_CLEAR_DTIME;
__u32 *dtimes = skel->bss->dtimes[t];
__u32 *errs = skel->bss->errs[t];
skel->bss->test = t;
test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P101));
ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, EGRESS_FWDNS_P100));
ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
dtime_cnt_str(t, EGRESS_FWDNS_P101));
ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
dtime_cnt_str(t, EGRESS_ENDHOST));
ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
dtime_cnt_str(t, INGRESS_ENDHOST));
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
}
static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
{
__u32 *dtimes, *errs;
const char *addr;
int i, t;
if (family == AF_INET) {
t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
addr = IP4_DST;
} else {
t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
addr = IP6_DST;
}
dtimes = skel->bss->dtimes[t];
errs = skel->bss->errs[t];
skel->bss->test = t;
test_inet_dtime(family, SOCK_STREAM, addr, 0);
/* fwdns_prio100 prog does not read delivery_time_type, so
* kernel puts the (rcv) timetamp in __sk_buff->tstamp
*/
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
}
static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
{
__u32 *dtimes, *errs;
const char *addr;
int i, t;
if (family == AF_INET) {
t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
addr = IP4_DST;
} else {
t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
addr = IP6_DST;
}
dtimes = skel->bss->dtimes[t];
errs = skel->bss->errs[t];
skel->bss->test = t;
test_inet_dtime(family, SOCK_DGRAM, addr, 0);
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
/* non mono delivery time is not forwarded */
ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
}
static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
{
struct test_tc_dtime *skel;
struct nstoken *nstoken;
int err;
skel = test_tc_dtime__open();
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
return;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_dtime__load(skel);
if (!ASSERT_OK(err, "test_tc_dtime__load"))
goto done;
if (netns_load_dtime_bpf(skel))
goto done;
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto done;
err = set_forwarding(false);
close_netns(nstoken);
if (!ASSERT_OK(err, "disable forwarding"))
goto done;
test_tcp_clear_dtime(skel);
test_tcp_dtime(skel, AF_INET, true);
test_tcp_dtime(skel, AF_INET6, true);
test_udp_dtime(skel, AF_INET, true);
test_udp_dtime(skel, AF_INET6, true);
/* Test the kernel ip[6]_forward path instead
* of bpf_redirect_neigh().
*/
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto done;
err = set_forwarding(true);
close_netns(nstoken);
if (!ASSERT_OK(err, "enable forwarding"))
goto done;
test_tcp_dtime(skel, AF_INET, false);
test_tcp_dtime(skel, AF_INET6, false);
test_udp_dtime(skel, AF_INET, false);
test_udp_dtime(skel, AF_INET6, false);
done:
test_tc_dtime__destroy(skel);
}
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
struct nstoken *nstoken = NULL;
struct test_tc_neigh_fib *skel = NULL;
int err;
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
return;
skel = test_tc_neigh_fib__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
goto done;
if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
goto done;
if (netns_load_bpf())
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
goto done;
test_connectivity();
done:
if (skel)
test_tc_neigh_fib__destroy(skel);
close_netns(nstoken);
}
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
{
struct nstoken *nstoken = NULL;
struct test_tc_neigh *skel = NULL;
int err;
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
return;
skel = test_tc_neigh__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_neigh__load(skel);
if (!ASSERT_OK(err, "test_tc_neigh__load"))
goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
goto done;
if (netns_load_bpf())
goto done;
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto done;
test_connectivity();
done:
if (skel)
test_tc_neigh__destroy(skel);
close_netns(nstoken);
}
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
{
struct nstoken *nstoken;
struct test_tc_peer *skel;
int err;
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
return;
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
goto done;
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
goto done;
if (netns_load_bpf())
goto done;
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto done;
test_connectivity();
done:
if (skel)
test_tc_peer__destroy(skel);
close_netns(nstoken);
}
static int tun_open(char *name)
{
struct ifreq ifr;
int fd, err;
fd = open("/dev/net/tun", O_RDWR);
if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
return -1;
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
if (*name)
strncpy(ifr.ifr_name, name, IFNAMSIZ);
err = ioctl(fd, TUNSETIFF, &ifr);
if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
goto fail;
SYS("ip link set dev %s up", name);
return fd;
fail:
close(fd);
return -1;
}
enum {
SRC_TO_TARGET = 0,
TARGET_TO_SRC = 1,
};
static int tun_relay_loop(int src_fd, int target_fd)
{
fd_set rfds, wfds;
FD_ZERO(&rfds);
FD_ZERO(&wfds);
for (;;) {
char buf[1500];
int direction, nread, nwrite;
FD_SET(src_fd, &rfds);
FD_SET(target_fd, &rfds);
if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
log_err("select failed");
return 1;
}
direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
if (nread < 0) {
log_err("read failed");
return 1;
}
nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
if (nwrite != nread) {
log_err("write failed");
return 1;
}
}
}
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
{
struct test_tc_peer *skel = NULL;
struct nstoken *nstoken = NULL;
int err;
int tunnel_pid = -1;
int src_fd, target_fd = -1;
int ifindex;
/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
* This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
* expose the L2 headers encapsulating the IP packet to BPF and hence
* don't have skb in suitable state for this test. Alternative to TUN/TAP
* would be e.g. Wireguard which would appear as a pure L3 device to BPF,
* but that requires much more complicated setup.
*/
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return;
src_fd = tun_open("tun_src");
if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
goto fail;
close_netns(nstoken);
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
goto fail;
target_fd = tun_open("tun_fwd");
if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
goto fail;
tunnel_pid = fork();
if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
goto fail;
if (tunnel_pid == 0)
exit(tun_relay_loop(src_fd, target_fd));
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto fail;
ifindex = get_ifindex("tun_fwd");
if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
goto fail;
skel->rodata->IFINDEX_SRC = ifindex;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
goto fail;
err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
goto fail;
err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
goto fail;
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
goto fail;
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
* towards dst, and "tc_dst" to redirect packets
* and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
*/
SYS("tc qdisc add dev tun_fwd clsact");
SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
SYS("tc qdisc add dev veth_dst_fwd clsact");
SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
DST_PROG_PIN_FILE);
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
/* Setup route and neigh tables */
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global");
SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global");
SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail;
test_connectivity();
fail:
if (tunnel_pid > 0) {
kill(tunnel_pid, SIGTERM);
waitpid(tunnel_pid, NULL, 0);
}
if (src_fd >= 0)
close(src_fd);
if (target_fd >= 0)
close(target_fd);
if (skel)
test_tc_peer__destroy(skel);
if (nstoken)
close_netns(nstoken);
}
#define RUN_TEST(name) \
({ \
struct netns_setup_result setup_result; \
if (test__start_subtest(#name)) \
if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
"setup links and routes")) \
test_ ## name(&setup_result); \
netns_setup_namespaces("delete"); \
} \
})
static void *test_tc_redirect_run_tests(void *arg)
{
netns_setup_namespaces_nofail("delete");
RUN_TEST(tc_redirect_peer);
RUN_TEST(tc_redirect_peer_l3);
RUN_TEST(tc_redirect_neigh);
RUN_TEST(tc_redirect_neigh_fib);
RUN_TEST(tc_redirect_dtime);
return NULL;
}
void serial_test_tc_redirect(void)
{
pthread_t test_thread;
int err;
/* Run the tests in their own thread to isolate the namespace changes
* so they do not affect the environment of other tests.
* (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
*/
err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
if (ASSERT_OK(err, "pthread_create"))
ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
}