mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 17:08:10 +00:00
63f818f46a
syzbot wrote:
> ========================================================
> WARNING: possible irq lock inversion dependency detected
> 5.6.0-syzkaller #0 Not tainted
> --------------------------------------------------------
> swapper/1/0 just changed the state of lock:
> ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840
> but this lock took another, SOFTIRQ-unsafe lock in the past:
> (&pid->wait_pidfd){+.+.}-{2:2}
>
>
> and interrupts could create inverse lock ordering between them.
>
>
> other info that might help us debug this:
> Possible interrupt unsafe locking scenario:
>
> CPU0 CPU1
> ---- ----
> lock(&pid->wait_pidfd);
> local_irq_disable();
> lock(tasklist_lock);
> lock(&pid->wait_pidfd);
> <Interrupt>
> lock(tasklist_lock);
>
> *** DEADLOCK ***
>
> 4 locks held by swapper/1/0:
The problem is that because wait_pidfd.lock is taken under the tasklist
lock. It must always be taken with irqs disabled as tasklist_lock can be
taken from interrupt context and if wait_pidfd.lock was already taken this
would create a lock order inversion.
Oleg suggested just disabling irqs where I have added extra calls to
wait_pidfd.lock. That should be safe and I think the code will eventually
do that. It was rightly pointed out by Christian that sharing the
wait_pidfd.lock was a premature optimization.
It is also true that my pre-merge window testing was insufficient. So
remove the premature optimization and give struct pid a dedicated lock of
it's own for struct pid things. I have verified that lockdep sees all 3
paths where we take the new pid->lock and lockdep does not complain.
It is my current day dream that one day pid->lock can be used to guard the
task lists as well and then the tasklist_lock won't need to be held to
deliver signals. That will require taking pid->lock with irqs disabled.
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/lkml/00000000000011d66805a25cd73f@google.com/
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Reported-by: syzbot+343f75cdeea091340956@syzkaller.appspotmail.com
Reported-by: syzbot+832aabf700bc3ec920b9@syzkaller.appspotmail.com
Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com
Reported-by: syzbot+a9fb1457d720a55d6dc5@syzkaller.appspotmail.com
Fixes: 7bc3e6e55a
("proc: Use a list of inodes to flush from proc")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
210 lines
6 KiB
C
210 lines
6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PID_H
|
|
#define _LINUX_PID_H
|
|
|
|
#include <linux/rculist.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/refcount.h>
|
|
|
|
enum pid_type
|
|
{
|
|
PIDTYPE_PID,
|
|
PIDTYPE_TGID,
|
|
PIDTYPE_PGID,
|
|
PIDTYPE_SID,
|
|
PIDTYPE_MAX,
|
|
};
|
|
|
|
/*
|
|
* What is struct pid?
|
|
*
|
|
* A struct pid is the kernel's internal notion of a process identifier.
|
|
* It refers to individual tasks, process groups, and sessions. While
|
|
* there are processes attached to it the struct pid lives in a hash
|
|
* table, so it and then the processes that it refers to can be found
|
|
* quickly from the numeric pid value. The attached processes may be
|
|
* quickly accessed by following pointers from struct pid.
|
|
*
|
|
* Storing pid_t values in the kernel and referring to them later has a
|
|
* problem. The process originally with that pid may have exited and the
|
|
* pid allocator wrapped, and another process could have come along
|
|
* and been assigned that pid.
|
|
*
|
|
* Referring to user space processes by holding a reference to struct
|
|
* task_struct has a problem. When the user space process exits
|
|
* the now useless task_struct is still kept. A task_struct plus a
|
|
* stack consumes around 10K of low kernel memory. More precisely
|
|
* this is THREAD_SIZE + sizeof(struct task_struct). By comparison
|
|
* a struct pid is about 64 bytes.
|
|
*
|
|
* Holding a reference to struct pid solves both of these problems.
|
|
* It is small so holding a reference does not consume a lot of
|
|
* resources, and since a new struct pid is allocated when the numeric pid
|
|
* value is reused (when pids wrap around) we don't mistakenly refer to new
|
|
* processes.
|
|
*/
|
|
|
|
|
|
/*
|
|
* struct upid is used to get the id of the struct pid, as it is
|
|
* seen in particular namespace. Later the struct pid is found with
|
|
* find_pid_ns() using the int nr and struct pid_namespace *ns.
|
|
*/
|
|
|
|
struct upid {
|
|
int nr;
|
|
struct pid_namespace *ns;
|
|
};
|
|
|
|
struct pid
|
|
{
|
|
refcount_t count;
|
|
unsigned int level;
|
|
spinlock_t lock;
|
|
/* lists of tasks that use this pid */
|
|
struct hlist_head tasks[PIDTYPE_MAX];
|
|
struct hlist_head inodes;
|
|
/* wait queue for pidfd notifications */
|
|
wait_queue_head_t wait_pidfd;
|
|
struct rcu_head rcu;
|
|
struct upid numbers[1];
|
|
};
|
|
|
|
extern struct pid init_struct_pid;
|
|
|
|
extern const struct file_operations pidfd_fops;
|
|
|
|
struct file;
|
|
|
|
extern struct pid *pidfd_pid(const struct file *file);
|
|
|
|
static inline struct pid *get_pid(struct pid *pid)
|
|
{
|
|
if (pid)
|
|
refcount_inc(&pid->count);
|
|
return pid;
|
|
}
|
|
|
|
extern void put_pid(struct pid *pid);
|
|
extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
|
|
static inline bool pid_has_task(struct pid *pid, enum pid_type type)
|
|
{
|
|
return !hlist_empty(&pid->tasks[type]);
|
|
}
|
|
extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
|
|
|
|
extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
|
|
|
|
/*
|
|
* these helpers must be called with the tasklist_lock write-held.
|
|
*/
|
|
extern void attach_pid(struct task_struct *task, enum pid_type);
|
|
extern void detach_pid(struct task_struct *task, enum pid_type);
|
|
extern void change_pid(struct task_struct *task, enum pid_type,
|
|
struct pid *pid);
|
|
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
|
|
enum pid_type);
|
|
|
|
struct pid_namespace;
|
|
extern struct pid_namespace init_pid_ns;
|
|
|
|
/*
|
|
* look up a PID in the hash table. Must be called with the tasklist_lock
|
|
* or rcu_read_lock() held.
|
|
*
|
|
* find_pid_ns() finds the pid in the namespace specified
|
|
* find_vpid() finds the pid by its virtual id, i.e. in the current namespace
|
|
*
|
|
* see also find_task_by_vpid() set in include/linux/sched.h
|
|
*/
|
|
extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
|
|
extern struct pid *find_vpid(int nr);
|
|
|
|
/*
|
|
* Lookup a PID in the hash table, and return with it's count elevated.
|
|
*/
|
|
extern struct pid *find_get_pid(int nr);
|
|
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
|
|
|
|
extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
|
size_t set_tid_size);
|
|
extern void free_pid(struct pid *pid);
|
|
extern void disable_pid_allocation(struct pid_namespace *ns);
|
|
|
|
/*
|
|
* ns_of_pid() returns the pid namespace in which the specified pid was
|
|
* allocated.
|
|
*
|
|
* NOTE:
|
|
* ns_of_pid() is expected to be called for a process (task) that has
|
|
* an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
|
|
* is expected to be non-NULL. If @pid is NULL, caller should handle
|
|
* the resulting NULL pid-ns.
|
|
*/
|
|
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
|
|
{
|
|
struct pid_namespace *ns = NULL;
|
|
if (pid)
|
|
ns = pid->numbers[pid->level].ns;
|
|
return ns;
|
|
}
|
|
|
|
/*
|
|
* is_child_reaper returns true if the pid is the init process
|
|
* of the current namespace. As this one could be checked before
|
|
* pid_ns->child_reaper is assigned in copy_process, we check
|
|
* with the pid number.
|
|
*/
|
|
static inline bool is_child_reaper(struct pid *pid)
|
|
{
|
|
return pid->numbers[pid->level].nr == 1;
|
|
}
|
|
|
|
/*
|
|
* the helpers to get the pid's id seen from different namespaces
|
|
*
|
|
* pid_nr() : global id, i.e. the id seen from the init namespace;
|
|
* pid_vnr() : virtual id, i.e. the id seen from the pid namespace of
|
|
* current.
|
|
* pid_nr_ns() : id seen from the ns specified.
|
|
*
|
|
* see also task_xid_nr() etc in include/linux/sched.h
|
|
*/
|
|
|
|
static inline pid_t pid_nr(struct pid *pid)
|
|
{
|
|
pid_t nr = 0;
|
|
if (pid)
|
|
nr = pid->numbers[0].nr;
|
|
return nr;
|
|
}
|
|
|
|
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
|
|
pid_t pid_vnr(struct pid *pid);
|
|
|
|
#define do_each_pid_task(pid, type, task) \
|
|
do { \
|
|
if ((pid) != NULL) \
|
|
hlist_for_each_entry_rcu((task), \
|
|
&(pid)->tasks[type], pid_links[type]) {
|
|
|
|
/*
|
|
* Both old and new leaders may be attached to
|
|
* the same pid in the middle of de_thread().
|
|
*/
|
|
#define while_each_pid_task(pid, type, task) \
|
|
if (type == PIDTYPE_PID) \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define do_each_pid_thread(pid, type, task) \
|
|
do_each_pid_task(pid, type, task) { \
|
|
struct task_struct *tg___ = task; \
|
|
for_each_thread(tg___, task) {
|
|
|
|
#define while_each_pid_thread(pid, type, task) \
|
|
} \
|
|
task = tg___; \
|
|
} while_each_pid_task(pid, type, task)
|
|
#endif /* _LINUX_PID_H */
|