// SPDX-License-Identifier: GPL-2.0 /* XDP sockets * * AF_XDP sockets allows a channel between XDP programs and userspace * applications. * Copyright(c) 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * Author(s): Björn Töpel * Magnus Karlsson */ #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__ #include #include #include #include #include #include #include #include #include #include #include #include #include "xsk_queue.h" #include "xdp_umem.h" static struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; } static int xsk_init_queue(u32 entries, struct xsk_queue **queue, bool umem_queue) { struct xsk_queue *q; if (entries == 0 || *queue || !is_power_of_2(entries)) return -EINVAL; q = xskq_create(entries, umem_queue); if (!q) return -ENOMEM; *queue = q; return 0; } static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net; if (!sk) return 0; net = sock_net(sk); local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); sock_orphan(sk); sock->sk = NULL; sk_refcnt_debug_release(sk); sock_put(sk); return 0; } static int xsk_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); int err; if (level != SOL_XDP) return -ENOPROTOOPT; switch (optname) { case XDP_RX_RING: { struct xsk_queue **q; int entries; if (optlen < sizeof(entries)) return -EINVAL; if (copy_from_user(&entries, optval, sizeof(entries))) return -EFAULT; mutex_lock(&xs->mutex); q = &xs->rx; err = xsk_init_queue(entries, q, false); mutex_unlock(&xs->mutex); return err; } case XDP_UMEM_REG: { struct xdp_umem_reg mr; struct xdp_umem *umem; if (xs->umem) return -EBUSY; if (copy_from_user(&mr, optval, sizeof(mr))) return -EFAULT; mutex_lock(&xs->mutex); err = xdp_umem_create(&umem); err = xdp_umem_reg(umem, &mr); if (err) { kfree(umem); mutex_unlock(&xs->mutex); return err; } /* Make sure umem is ready before it can be seen by others */ smp_wmb(); xs->umem = umem; mutex_unlock(&xs->mutex); return 0; } case XDP_UMEM_FILL_RING: { struct xsk_queue **q; int entries; if (!xs->umem) return -EINVAL; if (copy_from_user(&entries, optval, sizeof(entries))) return -EFAULT; mutex_lock(&xs->mutex); q = &xs->umem->fq; err = xsk_init_queue(entries, q, true); mutex_unlock(&xs->mutex); return err; } default: break; } return -ENOPROTOOPT; } static int xsk_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; struct xdp_sock *xs = xdp_sk(sock->sk); struct xsk_queue *q = NULL; unsigned long pfn; struct page *qpg; if (offset == XDP_PGOFF_RX_RING) { q = xs->rx; } else { if (!xs->umem) return -EINVAL; if (offset == XDP_UMEM_PGOFF_FILL_RING) q = xs->umem->fq; else return -EINVAL; } if (!q) return -EINVAL; qpg = virt_to_head_page(q->ring); if (size > (PAGE_SIZE << compound_order(qpg))) return -EINVAL; pfn = virt_to_phys(q->ring) >> PAGE_SHIFT; return remap_pfn_range(vma, vma->vm_start, pfn, size, vma->vm_page_prot); } static struct proto xsk_proto = { .name = "XDP", .owner = THIS_MODULE, .obj_size = sizeof(struct xdp_sock), }; static const struct proto_ops xsk_proto_ops = { .family = PF_XDP, .owner = THIS_MODULE, .release = xsk_release, .bind = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = sock_no_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = xsk_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, .mmap = xsk_mmap, .sendpage = sock_no_sendpage, }; static void xsk_destruct(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); if (!sock_flag(sk, SOCK_DEAD)) return; xskq_destroy(xs->rx); xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); } static int xsk_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct xdp_sock *xs; if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (protocol) return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern); if (!sk) return -ENOBUFS; sock->ops = &xsk_proto_ops; sock_init_data(sock, sk); sk->sk_family = PF_XDP; sk->sk_destruct = xsk_destruct; sk_refcnt_debug_inc(sk); xs = xdp_sk(sk); mutex_init(&xs->mutex); local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); local_bh_enable(); return 0; } static const struct net_proto_family xsk_family_ops = { .family = PF_XDP, .create = xsk_create, .owner = THIS_MODULE, }; static int __init xsk_init(void) { int err; err = proto_register(&xsk_proto, 0 /* no slab */); if (err) goto out; err = sock_register(&xsk_family_ops); if (err) goto out_proto; return 0; out_proto: proto_unregister(&xsk_proto); out: return err; } fs_initcall(xsk_init);