From 451ef36bd229f8aa329cb2258a859b4c636d08ef Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:31:37 +0200 Subject: [PATCH 01/22] ip_tunnels: Add new flow flags field to ip_tunnel_key This commit extends the ip_tunnel_key struct with a new field for the flow flags, to pass them to the route lookups. This new field will be populated and used in subsequent commits. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/f8bfd4983bd06685a59b1e3ba76ca27496f51ef3.1658759380.git.paul@isovalent.com --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 20db95055db3..63fac94f9ace 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -54,6 +54,7 @@ struct ip_tunnel_key { __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; + __u8 flow_flags; }; /* Flags for ip_tunnel_info mode. */ From 7e2fb8bc7ef6c7a63ca95751b90162dece0b3f4c Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:31:58 +0200 Subject: [PATCH 02/22] vxlan: Use ip_tunnel_key flow flags in route lookups Use the new ip_tunnel_key field with the flow flags in the IPv4 route lookups for the encapsulated packet. This will be used by the bpf_skb_set_tunnel_key helper in a subsequent commit. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1ffc95c3d60182fd5ec0cf6602083f8f68afe98f.1658759380.git.paul@isovalent.com --- drivers/net/vxlan/vxlan_core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8b0710b576c2..90811ab851fd 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2243,7 +2243,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, - struct dst_cache *dst_cache, + __u8 flow_flags, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); @@ -2270,6 +2270,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device fl4.saddr = *saddr; fl4.fl4_dport = dport; fl4.fl4_sport = sport; + fl4.flowi4_flags = flow_flags; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -2459,7 +2460,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __u8 tos, ttl; + __u8 tos, ttl, flow_flags = 0; int ifindex; int err; u32 flags = vxlan->cfg.flags; @@ -2525,6 +2526,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } dst = &remote_ip; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + flow_flags = info->key.flow_flags; vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; @@ -2555,7 +2557,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, dst->sin.sin_addr.s_addr, &local_ip.sin.sin_addr.s_addr, - dst_port, src_port, + dst_port, src_port, flow_flags, dst_cache, info); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -3061,7 +3063,8 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, &info->key.u.ipv4.src, dport, sport, - &info->dst_cache, info); + info->key.flow_flags, &info->dst_cache, + info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); From 861396ac0b47780210b72c4fea359540965a4970 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:32:15 +0200 Subject: [PATCH 03/22] geneve: Use ip_tunnel_key flow flags in route lookups Use the new ip_tunnel_key field with the flow flags in the IPv4 route lookups for the encapsulated packet. This will be used by the bpf_skb_set_tunnel_key helper in the subsequent commit. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/fcc2e0eea01e8ea465a180126366ec20596ba530.1658759380.git.paul@isovalent.com --- drivers/net/geneve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2495a5719e1c..018d365f9deb 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->saddr = info->key.u.ipv4.src; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_flags = info->key.flow_flags; tos = info->key.tos; if ((tos == 1) && !geneve->cfg.collect_md) { From b8fff748521c7178b9a7d32b5a34a81cec8396f3 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:32:34 +0200 Subject: [PATCH 04/22] bpf: Set flow flag to allow any source IP in bpf_tunnel_key Commit 26101f5ab6bd ("bpf: Add source ip in "struct bpf_tunnel_key"") added support for getting and setting the outer source IP of encapsulated packets via the bpf_skb_{get,set}_tunnel_key BPF helper. This change allows BPF programs to set any IP address as the source, including for example the IP address of a container running on the same host. In that last case, however, the encapsulated packets are dropped when looking up the route because the source IP address isn't assigned to any interface on the host. To avoid this, we need to set the FLOWI_FLAG_ANYSRC flag. Fixes: 26101f5ab6bd ("bpf: Add source ip in "struct bpf_tunnel_key"") Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/76873d384e21288abe5767551a0799ac93ec07fb.1658759380.git.paul@isovalent.com --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 57c5e4c4efd2..dffc7dcda96a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4653,6 +4653,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); + info->key.flow_flags = FLOWI_FLAG_ANYSRC; } return 0; From 1115169f47ae45eeb04c616c404492bc8268daa0 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:32:53 +0200 Subject: [PATCH 05/22] selftests/bpf: Don't assign outer source IP to host The previous commit fixed a bug in the bpf_skb_set_tunnel_key helper to avoid dropping packets whose outer source IP address isn't assigned to a host interface. This commit changes the corresponding selftest to not assign the outer source IP address to an interface. Not assigning the source IP to an interface causes two issues in the existing test: 1. The ARP requests will fail for that IP address so we need to add the ARP entry manually. 2. The encapsulated ICMP echo reply traffic will not reach the VXLAN device. It will be dropped by the stack before, because the outer destination IP is unknown. To solve 2., we have two choices. Either we perform decapsulation ourselves in a BPF program attached at veth1 (the base device for the VXLAN device), or we switch the outer destination address when we receive the packet at veth1, such that the stack properly demultiplexes it to the VXLAN device afterward. This commit implements the second approach, where we switch the outer destination address from the unassigned IP address to the assigned one, only for VXLAN traffic ingressing veth1. Then, at the vxlan device, the BPF program that checks the output of bpf_skb_get_tunnel_key needs to be updated as the expected local IP address is now the unassigned one. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/4addde76eaf3477a58975bef15ed2788c44e5f55.1658759380.git.paul@isovalent.com --- .../selftests/bpf/prog_tests/test_tunnel.c | 17 +++- .../selftests/bpf/progs/test_tunnel_kern.c | 80 ++++++++++++++++--- 2 files changed, 86 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 3bba4a2a0530..eea274110267 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -82,6 +82,7 @@ #define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00" +#define MAC_VETH1 "52:54:00:d9:03:00" #define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV1 "vxlan11" @@ -108,10 +109,9 @@ static int config_device(void) { SYS("ip netns add at_ns0"); - SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); SYS("ip link set veth0 netns at_ns0"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); SYS("ip link set dev veth1 up mtu 1500"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); @@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ SYS("ip link add dev %s type vxlan external gbp dstport 4789", @@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) goto done; + /* load and attach bpf prog to veth dev tc hook point */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + goto done; + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 17f2f325b3f3..df0673c4ecbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -14,15 +14,24 @@ #include #include #include +#include #include #include #include #include +#include #include #include #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) +#define VXLAN_UDP_PORT 4789 + +/* Only IPv4 address assigned to veth1. + * 172.16.1.200 + */ +#define ASSIGNED_ADDR_VETH1 0xac1001c8 + struct geneve_opt { __be16 opt_class; __u8 type; @@ -33,6 +42,11 @@ struct geneve_opt { __u8 opt_data[8]; /* hard-coded to 8 byte */ }; +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct vxlan_metadata { __u32 gbp; }; @@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 orig_daddr; __u32 index = 0; - __u32 *local_ip = NULL; - - local_ip = bpf_map_lookup_elem(&local_ip_map, &index); - if (!local_ip) { - log_err(ret); - return TC_ACT_SHOT; - } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { @@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_SHOT; } - if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) { bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", key.tunnel_id, key.local_ipv4, key.remote_ipv4, md.gbp); - bpf_printk("local_ip 0x%x\n", *local_ip); log_err(ret); return TC_ACT_SHOT; } @@ -402,6 +409,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("tc") +int veth_set_outer_dst(struct __sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(long)skb->data; + __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1); + void *data_end = (void *)(long)skb->data_end; + struct udphdr *udph; + struct iphdr *iph; + __u32 index = 0; + int ret = 0; + int shrink; + __s64 csum; + + if ((void *)eth + sizeof(*eth) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + iph = (struct iphdr *)(eth + 1); + if ((void *)iph + sizeof(*iph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (iph->protocol != IPPROTO_UDP) + return TC_ACT_OK; + + udph = (struct udphdr *)(iph + 1); + if ((void *)udph + sizeof(*udph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (udph->dest != bpf_htons(VXLAN_UDP_PORT)) + return TC_ACT_OK; + + if (iph->daddr != assigned_ip) { + csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip, + sizeof(__u32), 0); + if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), + &assigned_ip, sizeof(__u32), 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), + 0, csum, 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + bpf_skb_change_type(skb, PACKET_HOST); + } + return TC_ACT_OK; +} + SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { From bbd52178e249fe893ef4a9b87cde5b6c473b0a7c Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 22 Jul 2022 15:01:05 -0700 Subject: [PATCH 06/22] bpf: Fix bpf_xdp_pointer return pointer For the case where offset + len == size, bpf_xdp_pointer should return a valid pointer to the addr because that access is permitted. We should only return NULL in the case where offset + len exceeds size. Fixes: 3f364222d032 ("net: xdp: introduce bpf_xdp_pointer utility routine") Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/bpf/20220722220105.2065466-1-joannelkoong@gmail.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index dffc7dcda96a..5669248aff25 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3918,7 +3918,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) offset -= frag_size; } out: - return offset + len < size ? addr + offset : NULL; + return offset + len <= size ? addr + offset : NULL; } BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, From bd82ea52f0ee2890b698155a6d7bf9ea5bd8930d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Jul 2022 19:17:10 +0200 Subject: [PATCH 07/22] bpf, devmap: Compute proper xdp_frame len redirecting frames Even if it is currently forbidden to XDP_REDIRECT a multi-frag xdp_frame into a devmap, compute proper xdp_frame length in __xdp_enqueue and is_valid_dst routines running xdp_get_frame_len(). Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/894d99c01139e921bdb6868158ff8e67f661c072.1658596075.git.lorenzo@kernel.org --- kernel/bpf/devmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1400561efb15..a0e02b009487 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -477,7 +477,7 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = xdp_ok_fwd_dev(dev, xdpf->len); + err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf)); if (unlikely(err)) return err; @@ -536,7 +536,7 @@ static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) !obj->dev->netdev_ops->ndo_xdp_xmit) return false; - if (xdp_ok_fwd_dev(obj->dev, xdpf->len)) + if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf))) return false; return true; From 2d369b4b0044d4f6a3fa4071807f66eced0f2725 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Jul 2022 15:40:07 +0200 Subject: [PATCH 08/22] libbpf: Extend BPF_KSYSCALL documentation Explicitly list known quirks. Mention that socket-related syscalls can be invoked via socketcall(). Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220726134008.256968-2-iii@linux.ibm.com --- tools/lib/bpf/bpf_tracing.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f4d3e1e2abe2..43ca3aff2292 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -523,10 +523,17 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) * Original struct pt_regs * context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). * - * At the moment BPF_KSYSCALL does not handle all the calling convention - * quirks for mmap(), clone() and compat syscalls transparrently. This may or - * may not change in the future. User needs to take extra measures to handle - * such quirks explicitly, if necessary. + * At the moment BPF_KSYSCALL does not transparently handle all the calling + * convention quirks for the following syscalls: + * + * - mmap(): __ARCH_WANT_SYS_OLD_MMAP. + * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and + * CONFIG_CLONE_BACKWARDS3. + * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL. + * - compat syscalls. + * + * This may or may not change in the future. User needs to take extra measures + * to handle such quirks explicitly, if necessary. * * This macro relies on BPF CO-RE support and virtual __kconfig externs. */ From d295daf505758f9a0e4d05f4ee3bfdfb4192c18f Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Jul 2022 15:40:08 +0200 Subject: [PATCH 09/22] selftests/bpf: Attach to socketcall() in test_probe_user test_probe_user fails on architectures where libc uses socketcall(SYS_CONNECT) instead of connect(). Fix by attaching to socketcall as well. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220726134008.256968-3-iii@linux.ibm.com --- .../selftests/bpf/prog_tests/probe_user.c | 35 +++++++++++++------ .../selftests/bpf/progs/test_probe_user.c | 29 +++++++++++++-- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066eb..34dbd2adc157 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -4,25 +4,35 @@ /* TODO: corrupts other tests uses connect() */ void serial_test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + static const char *const prog_names[] = { + "handle_sys_connect", +#if defined(__s390x__) + "handle_sys_socketcall", +#endif + }; + enum { prog_count = ARRAY_SIZE(prog_names) }; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; struct sockaddr curr, orig, tmp; struct sockaddr_in *in = (struct sockaddr_in *)&curr; - struct bpf_link *kprobe_link = NULL; - struct bpf_program *kprobe_prog; + struct bpf_link *kprobe_links[prog_count] = {}; + struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + size_t i; obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", prog_name)) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_progs[i] = + bpf_object__find_program_by_name(obj, prog_names[i]); + if (CHECK(!kprobe_progs[i], "find_probe", + "prog '%s' not found\n", prog_names[i])) + goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -33,9 +43,11 @@ void serial_test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach(kprobe_prog); - if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_links[i] = bpf_program__attach(kprobe_progs[i]); + if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe")) + goto cleanup; + } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; @@ -69,6 +81,7 @@ void serial_test_probe_user(void) inet_ntoa(in->sin_addr), ntohs(in->sin_port))) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); + for (i = 0; i < prog_count; i++) + bpf_link__destroy(kprobe_links[i]); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8e1495008e4d..a8e501af9604 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,8 +7,7 @@ static struct sockaddr_in old; -SEC("ksyscall/connect") -int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) +static int handle_sys_connect_common(struct sockaddr_in *uservaddr) { struct sockaddr_in new; @@ -19,4 +18,30 @@ int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int return 0; } +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, + int addrlen) +{ + return handle_sys_connect_common(uservaddr); +} + +#if defined(bpf_target_s390) +#ifndef SYS_CONNECT +#define SYS_CONNECT 3 +#endif + +SEC("ksyscall/socketcall") +int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args) +{ + if (call == SYS_CONNECT) { + struct sockaddr_in *uservaddr; + + bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]); + return handle_sys_connect_common(uservaddr); + } + + return 0; +} +#endif + char _license[] SEC("license") = "GPL"; From aee993bbd05cede097885fa74e2627a458d3418a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 27 Jul 2022 00:11:54 +0000 Subject: [PATCH 10/22] selftests/bpf: Sort configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes sure to sort the existing minimal kernel configuration containing options required for running BPF selftests alphabetically. Doing so will make it easier to diff it against other configurations, which in turn helps with maintaining disjunct config files that build on top of each other. It also helped identify the CONFIG_IPV6_GRE being set twice and removes one of the occurrences. Lastly, we change NET_CLS_BPF from 'm' to 'y'. Having this option as 'm' will cause failures of the btf_skc_cls_ingress selftest. Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-2-deso@posteo.net --- tools/testing/selftests/bpf/config | 107 ++++++++++++++--------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c05904d631ec..fabf0c014349 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,65 +1,64 @@ +CONFIG_BLK_DEV_LOOP=y CONFIG_BPF=y -CONFIG_BPF_SYSCALL=y -CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y -CONFIG_TEST_BPF=m +CONFIG_BPF_JIT=y +CONFIG_BPF_LIRC_MODE2=y +CONFIG_BPF_LSM=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NETDEVSIM=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_INGRESS=y -CONFIG_NET_IPIP=y -CONFIG_IPV6=y -CONFIG_NET_IPGRE_DEMUX=y -CONFIG_NET_IPGRE=y -CONFIG_IPV6_GRE=y -CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m -CONFIG_VXLAN=y -CONFIG_GENEVE=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_LWTUNNEL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_XDP_SOCKETS=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_IPV6_TUNNEL=y -CONFIG_IPV6_GRE=y -CONFIG_IPV6_SEG6_BPF=y -CONFIG_NET_FOU=m -CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m -CONFIG_MPLS=y -CONFIG_NET_MPLS_GSO=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_IPV6_SIT=m -CONFIG_BPF_JIT=y -CONFIG_BPF_LSM=y -CONFIG_SECURITY=y -CONFIG_RC_CORE=y -CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y -CONFIG_IMA=y -CONFIG_SECURITYFS=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_READ_POLICY=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_FUNCTION_TRACER=y +CONFIG_CRYPTO_USER_API_HASH=m CONFIG_DYNAMIC_FTRACE=y -CONFIG_NETFILTER=y -CONFIG_NF_DEFRAG_IPV4=y -CONFIG_NF_DEFRAG_IPV6=y -CONFIG_NF_CONNTRACK=y -CONFIG_USERFAULTFD=y CONFIG_FPROBE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_GENEVE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_MPTCP=y -CONFIG_NETFILTER_SYNPROXY=y -CONFIG_NETFILTER_XT_TARGET_CT=y -CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_IMA=y +CONFIG_IMA_READ_POLICY=y +CONFIG_IMA_WRITE_POLICY=y CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IPV6=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m +CONFIG_IPV6_GRE=y +CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=y +CONFIG_LIRC=y +CONFIG_LWTUNNEL=y +CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPTCP=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_FOU=m +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y +CONFIG_NET_MPLS_GSO=m +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCHED=y +CONFIG_NETDEVSIM=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_RC_CORE=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_TEST_BPF=m +CONFIG_USERFAULTFD=y +CONFIG_VXLAN=y +CONFIG_XDP_SOCKETS=y From cbd620fc18cad51500c46e222328ca60adaa4644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 27 Jul 2022 00:11:55 +0000 Subject: [PATCH 11/22] selftests/bpf: Copy over libbpf configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change integrates libbpf maintained configurations and black/white lists [0] into the repository, co-located with the BPF selftests themselves. We minimize the kernel configurations to keep future updates as small as possible [1]. Furthermore, we make both kernel configurations build on top of the existing configuration tools/testing/selftests/bpf/config (to be concatenated before build). Lastly, we replaced the terms blacklist & whitelist with denylist and allowlist, respectively. [0] https://github.com/libbpf/libbpf/tree/20f03302350a4143825cedcbd210c4d7112c1898/travis-ci/vmtest/configs [1] https://lore.kernel.org/bpf/20220712212124.3180314-1-deso@posteo.net/T/#m30a53648352ed494e556ac003042a9ad0a8f98c6 Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-3-deso@posteo.net --- tools/testing/selftests/bpf/DENYLIST | 6 + tools/testing/selftests/bpf/DENYLIST.s390x | 67 ++++++ tools/testing/selftests/bpf/config.s390x | 147 ++++++++++++ tools/testing/selftests/bpf/config.x86_64 | 251 +++++++++++++++++++++ 4 files changed, 471 insertions(+) create mode 100644 tools/testing/selftests/bpf/DENYLIST create mode 100644 tools/testing/selftests/bpf/DENYLIST.s390x create mode 100644 tools/testing/selftests/bpf/config.s390x create mode 100644 tools/testing/selftests/bpf/config.x86_64 diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST new file mode 100644 index 000000000000..939de574fc7f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp +varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x new file mode 100644 index 000000000000..e33cab34d22f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -0,0 +1,67 @@ +# TEMPORARY +atomics # attach(add): actual -524 <= expected 0 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_loop # attaches to __x64_sys_nanosleep +bpf_mod_race # BPF trampoline +bpf_nf # JIT does not support calling kernel function +core_read_macros # unknown func bpf_probe_read#4 (overlapping) +d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) +fentry_fexit # fentry attach failed: -524 (trampoline) +fentry_test # fentry_first_attach unexpected error: -524 (trampoline) +fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) +fexit_test # fexit_first_attach unexpected error: -524 (trampoline) +get_func_args_test # trampoline +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) +get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) +ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) +ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +modify_return # modify_return attach failed: -524 (trampoline) +module_attach # skel_attach skeleton attach failed: -524 (trampoline) +mptcp +kprobe_multi_test # relies on fentry +netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) +probe_user # check_kprobe_res wrong kprobe res from probe read (?) +recursion # skel_attach unexpected error: -524 (trampoline) +ringbuf # skel_load skeleton load failed (?) +sk_assign # Can't read on server: Invalid argument (?) +sk_lookup # endianness problem +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) +socket_cookie # prog_attach unexpected error: -524 (trampoline) +stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) +test_bpffs # bpffs test failed 255 (iterator) +test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) +test_ima # failed to auto-attach program 'ima': -524 (trampoline) +test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) +test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_overhead # attach_fentry unexpected error: -524 (trampoline) +test_profiler # unknown func bpf_probe_read_str#45 (overlapping) +timer # failed to auto-attach program 'test1': -524 (trampoline) +timer_crash # trampoline +timer_mim # failed to auto-attach program 'test1': -524 (trampoline) +trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) +trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) +trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) +xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +map_kptr # failed to open_and_load program: -524 (trampoline) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) +send_signal # intermittently fails to receive signal +select_reuseport # intermittently fails on new s390x setup +xdp_synproxy # JIT does not support calling kernel function (kfunc) +unpriv_bpf_disabled # fentry diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x new file mode 100644 index 000000000000..f8a7a258a718 --- /dev/null +++ b/tools/testing/selftests/bpf/config.s390x @@ -0,0 +1,147 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FANOTIFY=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_MARCH_Z10_FEATURES=y +CONFIG_HAVE_MARCH_Z196_FEATURES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MARCH_Z196=y +CONFIG_MARCH_Z196_TUNE=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NF_TABLES=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITY_NETWORK=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 new file mode 100644 index 000000000000..f0859a1d37ab --- /dev/null +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -0,0 +1,251 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_AGP_SIS=y +CONFIG_AGP_VIA=y +CONFIG_AMIGA_PARTITION=y +CONFIG_AUDIT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KYE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETLABEL=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA=y +CONFIG_NUMA_BALANCING=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XDP_SOCKETS_DIAG=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y From 40b09653b1977c9630f24c9ca17322d5b38f1ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 27 Jul 2022 00:11:56 +0000 Subject: [PATCH 12/22] selftests/bpf: Adjust vmtest.sh to use local kernel configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far the vmtest.sh script, which can be used as a convenient way to run bpf selftests, has obtained the kernel config safe to use for testing from the libbpf/libbpf GitHub repository [0]. Given that we now have included this configuration into this very repository, we can just consume it from here as well, eliminating the necessity of remote accesses. With this change we adjust the logic in the script to use the configuration from below tools/testing/selftests/bpf/configs/ instead of pulling it over the network. [0] https://github.com/libbpf/libbpf Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-4-deso@posteo.net --- tools/testing/selftests/bpf/vmtest.sh | 53 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index e0bb04a97e10..b86ae4a2e5c5 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" @@ -269,26 +268,42 @@ is_rel_path() [[ ${path:0:1} != "/" ]] } +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + update_kconfig() { - local kconfig_file="$1" - local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" - # Github does not return the "last-modified" header when retrieving the - # raw contents of the file. Use the API call to get the last-modified - # time of the kernel config and only update the config if it has been - # updated after the previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ -f "${kconfig_file}" ]]; then - local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ - grep "last-modified" | awk -F ': ' '{print $2}')" - local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" - local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + local kernel_checkout="$1" + local kconfig_file="$2" - if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then - ${update_command} - fi + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done else - ${update_command} + do_update_kconfig "$kernel_checkout" "$kconfig_file" fi } @@ -372,7 +387,7 @@ main() mkdir -p "${OUTPUT_DIR}" mkdir -p "${mount_dir}" - update_kconfig "${kconfig_file}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" From 64893e83f916145fd23182209009e9d2ce36d2df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Thu, 28 Jul 2022 22:23:45 +0000 Subject: [PATCH 13/22] libbpf: Support PPC in arch_specific_syscall_pfx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 708ac5bea0ce ("libbpf: add ksyscall/kretsyscall sections support for syscall kprobes") added the arch_specific_syscall_pfx() function, which returns a string representing the architecture in use. As it turns out this function is currently not aware of Power PC, where NULL is returned. That's being flagged by the libbpf CI system, which builds for ppc64le and the compiler sees a NULL pointer being passed in to a %s format string. With this change we add representations for two more architectures, for Power PC and Power PC 64, and also adjust the string format logic to handle NULL pointers gracefully, in an attempt to prevent similar issues with other architectures in the future. Fixes: 708ac5bea0ce ("libbpf: add ksyscall/kretsyscall sections support for syscall kprobes") Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220728222345.3125975-1-deso@posteo.net --- tools/lib/bpf/libbpf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b01fe01b0761..50d41815f431 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9995,6 +9995,10 @@ static const char *arch_specific_syscall_pfx(void) return "mips"; #elif defined(__riscv) return "riscv"; +#elif defined(__powerpc__) + return "powerpc"; +#elif defined(__powerpc64__) + return "powerpc64"; #else return NULL; #endif @@ -10127,8 +10131,13 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + /* arch_specific_syscall_pfx() should never return NULL here + * because it is guarded by kernel_supports(). However, since + * compiler does not know that we have an explicit conditional + * as well. + */ snprintf(func_name, sizeof(func_name), "__%s_sys_%s", - arch_specific_syscall_pfx(), syscall_name); + arch_specific_syscall_pfx() ? : "", syscall_name); } else { snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); } From 58250ae350de8d28ce91ade4605d32c9e7f062a8 Mon Sep 17 00:00:00 2001 From: Fedor Tokarev Date: Mon, 11 Jul 2022 23:13:17 +0200 Subject: [PATCH 14/22] bpf: btf: Fix vsnprintf return value check vsnprintf returns the number of characters which would have been written if enough space had been available, excluding the terminating null byte. Thus, the return value of 'len_left' means that the last character has been dropped. Signed-off-by: Fedor Tokarev Signed-off-by: Andrii Nakryiko Acked-by: Alan Maguire Link: https://lore.kernel.org/bpf/20220711211317.GA1143610@laptop --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7ac971ea98d1..7e64447659f3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6643,7 +6643,7 @@ static void btf_snprintf_show(struct btf_show *show, const char *fmt, if (len < 0) { ssnprintf->len_left = 0; ssnprintf->len = len; - } else if (len > ssnprintf->len_left) { + } else if (len >= ssnprintf->len_left) { /* no space, drive on to get length we would have written */ ssnprintf->len_left = 0; ssnprintf->len += len; From 5eff8c18f124e47da0be79fb2b776e1c2cac0b07 Mon Sep 17 00:00:00 2001 From: Rongguang Wei Date: Tue, 26 Jul 2022 17:30:45 +0800 Subject: [PATCH 15/22] bpftool: Replace sizeof(arr)/sizeof(arr[0]) with ARRAY_SIZE macro Use the ARRAY_SIZE macro and make the code more compact. Signed-off-by: Rongguang Wei Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220726093045.3374026-1-clementwei90@163.com --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f081de398b60..c81362a001ba 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1962,7 +1962,7 @@ static int profile_parse_metrics(int argc, char **argv) int selected_cnt = 0; unsigned int i; - metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + metric_cnt = ARRAY_SIZE(metrics); while (argc > 0) { for (i = 0; i < metric_cnt; i++) { From a6df06744b2d0b953615e0d6ca8b5e84ae4019fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Thorben=20Hinz?= Date: Tue, 26 Jul 2022 15:32:03 +0200 Subject: [PATCH 16/22] bpftool: Don't try to return value from void function in skeleton MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A skeleton generated by bpftool previously contained a return followed by an expression in OBJ_NAME__detach(), which has return type void. This did not hurt, the bpf_object__detach_skeleton() called there returns void itself anyway, but led to a warning when compiling with e.g. -pedantic. Signed-off-by: Jörn-Thorben Hinz Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220726133203.514087-1-jthinz@mailbox.tu-berlin.de --- tools/bpf/bpftool/gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1cf53bb01936..7070dcffa822 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1175,7 +1175,7 @@ static int do_skeleton(int argc, char **argv) static inline void \n\ %1$s__detach(struct %1$s *obj) \n\ { \n\ - return bpf_object__detach_skeleton(obj->skeleton); \n\ + bpf_object__detach_skeleton(obj->skeleton); \n\ } \n\ ", obj_name From 639de43ef0dda165441af400ecb372e16b7f9354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Wed, 27 Jul 2022 18:29:55 +0000 Subject: [PATCH 17/22] selftests/bpf: Bump internal send_signal/send_signal_tracepoint timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The send_signal/send_signal_tracepoint is pretty flaky, with at least one failure in every ten runs on a few attempts I've tried it: > test_send_signal_common:PASS:pipe_c2p 0 nsec > test_send_signal_common:PASS:pipe_p2c 0 nsec > test_send_signal_common:PASS:fork 0 nsec > test_send_signal_common:PASS:skel_open_and_load 0 nsec > test_send_signal_common:PASS:skel_attach 0 nsec > test_send_signal_common:PASS:pipe_read 0 nsec > test_send_signal_common:PASS:pipe_write 0 nsec > test_send_signal_common:PASS:reading pipe 0 nsec > test_send_signal_common:PASS:reading pipe error: size 0 0 nsec > test_send_signal_common:FAIL:incorrect result unexpected incorrect result: actual 48 != expected 50 > test_send_signal_common:PASS:pipe_write 0 nsec > #139/1 send_signal/send_signal_tracepoint:FAIL The reason does not appear to be a correctness issue in the strict sense. Rather, we merely do not receive the signal we are waiting for within the provided timeout. Let's bump the timeout by a factor of ten. With that change I have not been able to reproduce the failure in 150+ iterations. I am also sneaking in a small simplification to the test_progs test selection logic. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220727182955.4044988-1-deso@posteo.net --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d71226e34c34..d63a20fbed33 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 100000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) j /= i + j + 1; buf[0] = sigusr1_received ? '2' : '0'; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c639f2e56fc5..3561c97701f2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1604,11 +1604,8 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - if (should_run(&env.test_selector, - test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; + test->should_run = should_run(&env.test_selector, + test->test_num, test->test_name); if ((test->run_test == NULL && test->run_serial_test == NULL) || (test->run_test != NULL && test->run_serial_test != NULL)) { From dc81f8d1e8ea3f5dfa88919cb834a135a6a536b8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 29 Jul 2022 12:41:06 -0700 Subject: [PATCH 18/22] bpf: Fix test_progs -j error with fentry/fexit tests When multiple threads are attaching/detaching fentry/fexit programs to the same trampoline, we may call register_fentry on the same trampoline twice: register_fentry(), unregister_fentry(), then register_fentry again. This causes ftrace_set_filter_ip() for the same ip on tr->fops twice, which leaves duplicated ip in tr->fops. The extra ip is not cleaned up properly on unregister and thus causes failures with further register in register_ftrace_direct_multi(): register_ftrace_direct_multi() { ... for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_find_rec_direct(entry->ip)) goto out_unlock; } } ... } This can be triggered with parallel fentry/fexit tests with test_progs: ./test_progs -t fentry,fexit -j Fix this by resetting tr->fops in ftrace_set_filter_ip(), so that there will never be duplicated entries in tr->fops. Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Reported-by: Andrii Nakryiko Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220729194106.1207472-1-song@kernel.org --- kernel/bpf/trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 42e387a12694..7ec7e23559ad 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -255,7 +255,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) return -ENOENT; if (tr->func.ftrace_managed) { - ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); From 3b317abc71598bda8ff9a9c483ad8ae167b18382 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 28 Jul 2022 07:40:48 -0400 Subject: [PATCH 19/22] bpf: Fix NULL pointer dereference when registering bpf trampoline A panic was reported on arm64: [ 44.517109] audit: type=1334 audit(1658859870.268:59): prog-id=19 op=LOAD [ 44.622031] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 [ 44.624321] Mem abort info: [ 44.625049] ESR = 0x0000000096000004 [ 44.625935] EC = 0x25: DABT (current EL), IL = 32 bits [ 44.627182] SET = 0, FnV = 0 [ 44.627930] EA = 0, S1PTW = 0 [ 44.628684] FSC = 0x04: level 0 translation fault [ 44.629788] Data abort info: [ 44.630474] ISV = 0, ISS = 0x00000004 [ 44.631362] CM = 0, WnR = 0 [ 44.632041] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000100ab5000 [ 44.633494] [0000000000000010] pgd=0000000000000000, p4d=0000000000000000 [ 44.635202] Internal error: Oops: 96000004 [#1] SMP [ 44.636452] Modules linked in: xfs crct10dif_ce ghash_ce virtio_blk virtio_console virtio_mmio qemu_fw_cfg [ 44.638713] CPU: 2 PID: 1 Comm: systemd Not tainted 5.19.0-rc7 #1 [ 44.640164] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 44.641799] pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 44.643404] pc : ftrace_set_filter_ip+0x24/0xa0 [ 44.644659] lr : bpf_trampoline_update.constprop.0+0x428/0x4a0 [ 44.646118] sp : ffff80000803b9f0 [ 44.646950] x29: ffff80000803b9f0 x28: ffff0b5d80364400 x27: ffff80000803bb48 [ 44.648721] x26: ffff8000085ad000 x25: ffff0b5d809d2400 x24: 0000000000000000 [ 44.650493] x23: 00000000ffffffed x22: ffff0b5dd7ea0900 x21: 0000000000000000 [ 44.652279] x20: 0000000000000000 x19: 0000000000000000 x18: ffffffffffffffff [ 44.654067] x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff [ 44.655787] x14: ffff0b5d809d2498 x13: ffff0b5d809d2432 x12: 0000000005f5e100 [ 44.657535] x11: abcc77118461cefd x10: 000000000000005f x9 : ffffa7219cb5b190 [ 44.659254] x8 : ffffa7219c8e0000 x7 : 0000000000000000 x6 : ffffa7219db075e0 [ 44.661066] x5 : ffffa7219d3130e0 x4 : ffffa7219cab9da0 x3 : 0000000000000000 [ 44.662837] x2 : 0000000000000000 x1 : ffffa7219cb7a5c0 x0 : 0000000000000000 [ 44.664675] Call trace: [ 44.665274] ftrace_set_filter_ip+0x24/0xa0 [ 44.666327] bpf_trampoline_update.constprop.0+0x428/0x4a0 [ 44.667696] __bpf_trampoline_link_prog+0xcc/0x1c0 [ 44.668834] bpf_trampoline_link_prog+0x40/0x64 [ 44.669919] bpf_tracing_prog_attach+0x120/0x490 [ 44.671011] link_create+0xe0/0x2b0 [ 44.671869] __sys_bpf+0x484/0xd30 [ 44.672706] __arm64_sys_bpf+0x30/0x40 [ 44.673678] invoke_syscall+0x78/0x100 [ 44.674623] el0_svc_common.constprop.0+0x4c/0xf4 [ 44.675783] do_el0_svc+0x38/0x4c [ 44.676624] el0_svc+0x34/0x100 [ 44.677429] el0t_64_sync_handler+0x11c/0x150 [ 44.678532] el0t_64_sync+0x190/0x194 [ 44.679439] Code: 2a0203f4 f90013f5 2a0303f5 f9001fe1 (f9400800) [ 44.680959] ---[ end trace 0000000000000000 ]--- [ 44.682111] Kernel panic - not syncing: Oops: Fatal exception [ 44.683488] SMP: stopping secondary CPUs [ 44.684551] Kernel Offset: 0x2721948e0000 from 0xffff800008000000 [ 44.686095] PHYS_OFFSET: 0xfffff4a380000000 [ 44.687144] CPU features: 0x010,00022811,19001080 [ 44.688308] Memory Limit: none [ 44.689082] ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- It's caused by a NULL tr->fops passed to ftrace_set_filter_ip(). tr->fops is initialized to NULL and is assigned to an allocated memory address if CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS is enabled. Since there is no direct call on arm64 yet, the config can't be enabled. To fix it, call ftrace_set_filter_ip() only if tr->fops is not NULL. Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Reported-by: Bruno Goncalves Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Tested-by: Bruno Goncalves Acked-by: Song Liu Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220728114048.3540461-1-xukuohai@huaweicloud.com --- kernel/bpf/trampoline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 7ec7e23559ad..c122d8b3ddc9 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -248,8 +248,11 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) int ret; faddr = ftrace_location((unsigned long)ip); - if (faddr) + if (faddr) { + if (!tr->fops) + return -ENOTSUPP; tr->func.ftrace_managed = true; + } if (bpf_trampoline_module_get(tr)) return -ENOENT; From d0b80a9edb1a029ff913e81b47540e57ad034329 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Jul 2022 14:36:05 -0700 Subject: [PATCH 20/22] netdevsim: Avoid allocation warnings triggered from user space We need to suppress warnings from sily map sizes. Also switch from GFP_USER to GFP_KERNEL_ACCOUNT, I'm pretty sure I misunderstood the flags when writing this code. Fixes: 395cacb5f1a0 ("netdevsim: bpf: support fake map offload") Reported-by: syzbot+ad24705d3fd6463b18c6@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220726213605.154204-1-kuba@kernel.org --- drivers/net/netdevsim/bpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index a43820212932..50854265864d 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -351,10 +351,12 @@ nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; - nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + nmap->entry[idx].key = kmalloc(offmap->map.key_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; - nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + nmap->entry[idx].value = kmalloc(offmap->map.value_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; @@ -496,7 +498,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) if (offmap->map.map_flags) return -EINVAL; - nmap = kzalloc(sizeof(*nmap), GFP_USER); + nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; From 395fc4fa33e98944384d0d6c8b3ecd9fb4b50d00 Mon Sep 17 00:00:00 2001 From: Joe Burton Date: Fri, 29 Jul 2022 20:27:27 +0000 Subject: [PATCH 21/22] libbpf: Add bpf_obj_get_opts() Add an extensible variant of bpf_obj_get() capable of setting the `file_flags` parameter. This parameter is needed to enable unprivileged access to BPF maps. Without a method like this, users must manually make the syscall. Signed-off-by: Joe Burton Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220729202727.3311806-1-jevburton.kernel@gmail.com --- tools/lib/bpf/bpf.c | 9 +++++++++ tools/lib/bpf/bpf.h | 11 +++++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 21 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5eb0df90eb2b..efcc06dafbd9 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -578,12 +578,21 @@ int bpf_obj_pin(int fd, const char *pathname) } int bpf_obj_get(const char *pathname) +{ + return bpf_obj_get_opts(pathname, NULL); +} + +int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) { union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_obj_get_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 88a7cc4bd76f..9c50beabdd14 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -270,8 +270,19 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); +struct bpf_obj_get_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + + size_t :0; +}; +#define bpf_obj_get_opts__last_field file_flags + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_obj_get_opts(const char *pathname, + const struct bpf_obj_get_opts *opts); struct bpf_prog_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0625adb9e888..119e6e1ea7f1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -355,6 +355,7 @@ LIBBPF_0.8.0 { LIBBPF_1.0.0 { global: + bpf_obj_get_opts; bpf_prog_query_opts; bpf_program__attach_ksyscall; btf__add_enum64; From 14250fa4839b3a48c979e7faaf4cbcce619d02bd Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 26 Jul 2022 06:27:33 +0800 Subject: [PATCH 22/22] bpf: Remove unneeded semicolon Eliminate the following coccicheck warning: /kernel/bpf/trampoline.c:101:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220725222733.55613-1-yang.lee@linux.alibaba.com --- kernel/bpf/trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index c122d8b3ddc9..0f532e6a717f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -98,7 +98,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd default: ret = -EINVAL; break; - }; + } mutex_unlock(&tr->mutex); return ret;