linux-stable/include/linux/eventpoll.h

90 lines
2.4 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* include/linux/eventpoll.h ( Efficient event polling implementation )
* Copyright (C) 2001,...,2006 Davide Libenzi
*
* Davide Libenzi <davidel@xmailserver.org>
*/
#ifndef _LINUX_EVENTPOLL_H
#define _LINUX_EVENTPOLL_H
#include <uapi/linux/eventpoll.h>
kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrey Vagin <avagin@openvz.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Jason Baron <jbaron@akamai.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-07-12 21:34:28 +00:00
#include <uapi/linux/kcmp.h>
/* Forward declarations to avoid compiler errors */
struct file;
#ifdef CONFIG_EPOLL
kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE Userspace has discovered the functionality offered by SYS_kcmp and has started to depend upon it. In particular, Mesa uses SYS_kcmp for os_same_file_description() in order to identify when two fd (e.g. device or dmabuf) point to the same struct file. Since they depend on it for core functionality, lift SYS_kcmp out of the non-default CONFIG_CHECKPOINT_RESTORE into the selectable syscall category. Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to deduplicate the per-service file descriptor store. Note that some distributions such as Ubuntu are already enabling CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp. References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Kees Cook <keescook@chromium.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Will Drewry <wad@chromium.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Dave Airlie <airlied@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Lucas Stach <l.stach@pengutronix.de> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: stable@vger.kernel.org Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> # DRM depends on kcmp Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> # systemd uses kcmp Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> Reviewed-by: Kees Cook <keescook@chromium.org> Acked-by: Thomas Zimmermann <tzimmermann@suse.de> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210205220012.1983-1-chris@chris-wilson.co.uk
2021-02-05 22:00:12 +00:00
#ifdef CONFIG_KCMP
kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrey Vagin <avagin@openvz.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Jason Baron <jbaron@akamai.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-07-12 21:34:28 +00:00
struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
#endif
kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrey Vagin <avagin@openvz.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Jason Baron <jbaron@akamai.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-07-12 21:34:28 +00:00
/* Used to release the epoll bits inside the "struct file" */
void eventpoll_release_file(struct file *file);
/*
* This is called from inside fs/file_table.c:__fput() to unlink files
* from the eventpoll interface. We need to have this facility to cleanup
* correctly files that are closed without being removed from the eventpoll
* interface.
*/
static inline void eventpoll_release(struct file *file)
{
/*
* Fast check to avoid the get/release of the semaphore. Since
* we're doing this outside the semaphore lock, it might return
* false negatives, but we don't care. It'll help in 99.99% of cases
* to avoid the semaphore lock. False positives simply cannot happen
* because the file in on the way to be removed and nobody ( but
* eventpoll ) has still a reference to this file.
*/
epoll: take epitem list out of struct file Move the head of epitem list out of struct file; for epoll ones it's moved into struct eventpoll (->refs there), for non-epoll - into the new object (struct epitem_head). In place of ->f_ep_links we leave a pointer to the list head (->f_ep). ->f_ep is protected by ->f_lock and it's zeroed as soon as the list of epitems becomes empty (that can happen only in ep_remove() by now). The list of files for reverse path check is *not* going through struct file now - it's a single-linked list going through epitem_head instances. It's terminated by ERR_PTR(-1) (== EP_UNACTIVE_POINTER), so the elements of list can be distinguished by head->next != NULL. epitem_head instances are allocated at ep_insert() time (by attach_epitem()) and freed either by ep_remove() (if it empties the set of epitems *and* epitem_head does not belong to the reverse path check list) or by clear_tfile_check_list() when the list is emptied (if the set of epitems is empty by that point). Allocations are done from a separate slab - minimal kmalloc() size is too large on some architectures. As the result, we trim struct file _and_ get rid of the games with temporary file references. Locking and barriers are interesting (aren't they always); see unlist_file() and ep_remove() for details. The non-obvious part is that ep_remove() needs to decide if it will be the one to free the damn thing *before* actually storing NULL to head->epitems.first - that's what smp_load_acquire is for in there. unlist_file() lockless path is safe, since we hit it only if we observe NULL in head->epitems.first and whoever had done that store is guaranteed to have observed non-NULL in head->next. IOW, their last access had been the store of NULL into ->epitems.first and we can safely free the sucker. OTOH, we are under rcu_read_lock() and both epitem and epitem->file have their freeing RCU-delayed. So if we see non-NULL ->epitems.first, we can grab ->f_lock (all epitems in there share the same struct file) and safely recheck the emptiness of ->epitems; again, ->next is still non-NULL, so ep_remove() couldn't have freed head yet. ->f_lock serializes us wrt ep_remove(); the rest is trivial. Note that once head->epitems becomes NULL, nothing can get inserted into it - the only remaining reference to head after that point is from the reverse path check list. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2020-10-02 00:45:51 +00:00
if (likely(!file->f_ep))
return;
/*
* The file is being closed while it is still linked to an epoll
* descriptor. We need to handle this by correctly unlinking it
* from its containers.
*/
eventpoll_release_file(file);
}
int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
bool nonblock);
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
return op != EPOLL_CTL_DEL;
}
#else
static inline void eventpoll_release(struct file *file) {}
#endif
#if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT)
/* ARM OABI has an incompatible struct layout and needs a special handler */
extern struct epoll_event __user *
epoll_put_uevent(__poll_t revents, __u64 data,
struct epoll_event __user *uevent);
#else
static inline struct epoll_event __user *
epoll_put_uevent(__poll_t revents, __u64 data,
struct epoll_event __user *uevent)
{
if (__put_user(revents, &uevent->events) ||
__put_user(data, &uevent->data))
return NULL;
return uevent+1;
}
#endif
#endif /* #ifndef _LINUX_EVENTPOLL_H */