mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-15 15:15:47 +00:00
2dbb9b9e6d
This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY. Like other non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN. BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern" to store the bpf context instead of using the skb->cb[48]. At the SO_REUSEPORT sk lookup time, it is in the middle of transiting from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp). At this point, it is not always clear where the bpf context can be appended in the skb->cb[48] to avoid saving-and-restoring cb[]. Even putting aside the difference between ipv4-vs-ipv6 and udp-vs-tcp. It is not clear if the lower layer is only ipv4 and ipv6 in the future and will it not touch the cb[] again before transiting to the upper layer. For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB instead of IP[6]CB and it may still modify the cb[] after calling the udp[46]_lib_lookup_skb(). Because of the above reason, if sk->cb is used for the bpf ctx, saving-and-restoring is needed and likely the whole 48 bytes cb[] has to be saved and restored. Instead of saving, setting and restoring the cb[], this patch opts to create a new "struct sk_reuseport_kern" and setting the needed values in there. The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)" will serve all ipv4/ipv6 + udp/tcp combinations. There is no protocol specific usage at this point and it is also inline with the current sock_reuseport.c implementation (i.e. no protocol specific requirement). In "struct sk_reuseport_md", this patch exposes data/data_end/len with semantic similar to other existing usages. Together with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()", the bpf prog can peek anywhere in the skb. The "bind_inany" tells the bpf prog that the reuseport group is bind-ed to a local INANY address which cannot be learned from skb. The new "bind_inany" is added to "struct sock_reuseport" which will be used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order to avoid repeating the "bind INANY" test on "sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run. It can only be properly initialized when a "sk->sk_reuseport" enabled sk is adding to a hashtable (i.e. during "reuseport_alloc()" and "reuseport_add_sock()"). The new "sk_select_reuseport()" is the main helper that the bpf prog will use to select a SO_REUSEPORT sk. It is the only function that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY. As mentioned in the earlier patch, the validity of a selected sk is checked in run time in "sk_select_reuseport()". Doing the check in verification time is difficult and inflexible (consider the map-in-map use case). The runtime check is to compare the selected sk's reuseport_id with the reuseport_id that we want. This helper will return -EXXX if the selected sk cannot serve the incoming request (e.g. reuseport_id not match). The bpf prog can decide if it wants to do SK_DROP as its discretion. When the bpf prog returns SK_PASS, the kernel will check if a valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL"). If it does , it will use the selected sk. If not, the kernel will select one from "reuse->socks[]" (as before this patch). The SK_DROP and SK_PASS handling logic will be in the next patch. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
69 lines
2.6 KiB
C
69 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* internal file - do not include directly */
|
|
|
|
#ifdef CONFIG_NET
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
|
|
#endif
|
|
#ifdef CONFIG_BPF_EVENTS
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
|
#endif
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
|
#endif
|
|
#ifdef CONFIG_BPF_LIRC_MODE2
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
|
|
#endif
|
|
#ifdef CONFIG_INET
|
|
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
|
|
#endif
|
|
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
|
|
#ifdef CONFIG_CGROUPS
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
|
|
#endif
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
|
|
#endif
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
|
|
#endif
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
|
|
#ifdef CONFIG_NET
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
|
|
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
|
|
#endif
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
|
|
#if defined(CONFIG_XDP_SOCKETS)
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
|
|
#endif
|
|
#ifdef CONFIG_INET
|
|
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
|
|
#endif
|
|
#endif
|