From 0fc0287c9ed1ffd3706f8b4d9b314aa102ef1245 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Nov 2013 15:03:41 +0100 Subject: [PATCH 1/2] cpuset: Fix memory allocator deadlock Juri hit the below lockdep report: [ 4.303391] ====================================================== [ 4.303392] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 4.303394] 3.12.0-dl-peterz+ #144 Not tainted [ 4.303395] ------------------------------------------------------ [ 4.303397] kworker/u4:3/689 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 4.303399] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x6c/0x290 [ 4.303417] [ 4.303417] and this task is already holding: [ 4.303418] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x5b/0x100 [ 4.303431] which would create a new lock dependency: [ 4.303432] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 4.303436] [ 4.303898] the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: [ 4.303918] -> (&p->mems_allowed_seq){+.+...} ops: 2762 { [ 4.303922] HARDIRQ-ON-W at: [ 4.303923] [] __lock_acquire+0x65a/0x1ff0 [ 4.303926] [] lock_acquire+0x93/0x140 [ 4.303929] [] kthreadd+0x86/0x180 [ 4.303931] [] ret_from_fork+0x7c/0xb0 [ 4.303933] SOFTIRQ-ON-W at: [ 4.303933] [] __lock_acquire+0x68c/0x1ff0 [ 4.303935] [] lock_acquire+0x93/0x140 [ 4.303940] [] kthreadd+0x86/0x180 [ 4.303955] [] ret_from_fork+0x7c/0xb0 [ 4.303959] INITIAL USE at: [ 4.303960] [] __lock_acquire+0x344/0x1ff0 [ 4.303963] [] lock_acquire+0x93/0x140 [ 4.303966] [] kthreadd+0x86/0x180 [ 4.303969] [] ret_from_fork+0x7c/0xb0 [ 4.303972] } Which reports that we take mems_allowed_seq with interrupts enabled. A little digging found that this can only be from cpuset_change_task_nodemask(). This is an actual deadlock because an interrupt doing an allocation will hit get_mems_allowed()->...->__read_seqcount_begin(), which will spin forever waiting for the write side to complete. Cc: John Stultz Cc: Mel Gorman Reported-by: Juri Lelli Signed-off-by: Peter Zijlstra Tested-by: Juri Lelli Acked-by: Li Zefan Acked-by: Mel Gorman Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cpuset.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6bf981e13c43..4772034b4b17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, need_loop = task_has_mempolicy(tsk) || !nodes_intersects(*newmems, tsk->mems_allowed); - if (need_loop) + if (need_loop) { + local_irq_disable(); write_seqcount_begin(&tsk->mems_allowed_seq); + } nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); @@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; - if (need_loop) + if (need_loop) { write_seqcount_end(&tsk->mems_allowed_seq); + local_irq_enable(); + } task_unlock(tsk); } From e605b36575e896edd8161534550c9ea021b03bc0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Nov 2013 18:16:21 -0500 Subject: [PATCH 2/2] cgroup: fix cgroup_subsys_state leak for seq_files If a cgroup file implements either read_map() or read_seq_string(), such file is served using seq_file by overriding file->f_op to cgroup_seqfile_operations, which also overrides the release method to single_release() from cgroup_file_release(). Because cgroup_file_open() didn't use to acquire any resources, this used to be fine, but since f7d58818ba42 ("cgroup: pin cgroup_subsys_state when opening a cgroupfs file"), cgroup_file_open() pins the css (cgroup_subsys_state) which is put by cgroup_file_release(). The patch forgot to update the release path for seq_files and each open/release cycle leaks a css reference. Fix it by updating cgroup_file_release() to also handle seq_files and using it for seq_file release path too. Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v3.12 --- kernel/cgroup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7b98ee35ef7..8b729c278b64 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -199,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], bool is_add); +static int cgroup_file_release(struct inode *inode, struct file *file); /** * cgroup_css - obtain a cgroup's css for the specified subsystem @@ -2429,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = { .read = seq_read, .write = cgroup_file_write, .llseek = seq_lseek, - .release = single_release, + .release = cgroup_file_release, }; static int cgroup_file_open(struct inode *inode, struct file *file) @@ -2490,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file) ret = cft->release(inode, file); if (css->ss) css_put(css); + if (file->f_op == &cgroup_seqfile_operations) + single_release(inode, file); return ret; }