From f0cc749254d12c78e93dae3b27b21dc9546843d0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 11 Jun 2023 22:48:12 +0900 Subject: [PATCH 1/2] cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex in freezer_css_{online,offline}() syzbot is again reporting circular locking dependency between cpu_hotplug_lock and freezer_mutex. Do like what we did with commit 57dcd64c7e036299 ("cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex"). Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=2ab700fe1829880a2ec6 Signed-off-by: Tetsuo Handa Tested-by: syzbot Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Tejun Heo --- kernel/cgroup/legacy_freezer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 936473203a6b..122dacb3a443 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -108,16 +108,18 @@ static int freezer_css_online(struct cgroup_subsys_state *css) struct freezer *freezer = css_freezer(css); struct freezer *parent = parent_freezer(freezer); + cpus_read_lock(); mutex_lock(&freezer_mutex); freezer->state |= CGROUP_FREEZER_ONLINE; if (parent && (parent->state & CGROUP_FREEZING)) { freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; - static_branch_inc(&freezer_active); + static_branch_inc_cpuslocked(&freezer_active); } mutex_unlock(&freezer_mutex); + cpus_read_unlock(); return 0; } @@ -132,14 +134,16 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); + cpus_read_lock(); mutex_lock(&freezer_mutex); if (freezer->state & CGROUP_FREEZING) - static_branch_dec(&freezer_active); + static_branch_dec_cpuslocked(&freezer_active); freezer->state = 0; mutex_unlock(&freezer_mutex); + cpus_read_unlock(); } static void freezer_css_free(struct cgroup_subsys_state *css) From 6f363f5aa845561f7ea496d8b1175e3204470486 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 10 Jun 2023 17:26:43 +0800 Subject: [PATCH 2/2] cgroup: Do not corrupt task iteration when rebinding subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found a refcount UAF bug as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 1 PID: 342 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x148 Workqueue: events cpuset_hotplug_workfn Call trace: refcount_warn_saturate+0xa0/0x148 __refcount_add.constprop.0+0x5c/0x80 css_task_iter_advance_css_set+0xd8/0x210 css_task_iter_advance+0xa8/0x120 css_task_iter_next+0x94/0x158 update_tasks_root_domain+0x58/0x98 rebuild_root_domains+0xa0/0x1b0 rebuild_sched_domains_locked+0x144/0x188 cpuset_hotplug_workfn+0x138/0x5a0 process_one_work+0x1e8/0x448 worker_thread+0x228/0x3e0 kthread+0xe0/0xf0 ret_from_fork+0x10/0x20 then a kernel panic will be triggered as below: Unable to handle kernel paging request at virtual address 00000000c0000010 Call trace: cgroup_apply_control_disable+0xa4/0x16c rebind_subsystems+0x224/0x590 cgroup_destroy_root+0x64/0x2e0 css_free_rwork_fn+0x198/0x2a0 process_one_work+0x1d4/0x4bc worker_thread+0x158/0x410 kthread+0x108/0x13c ret_from_fork+0x10/0x18 The race that cause this bug can be shown as below: (hotplug cpu) | (umount cpuset) mutex_lock(&cpuset_mutex) | mutex_lock(&cgroup_mutex) cpuset_hotplug_workfn | rebuild_root_domains | rebind_subsystems update_tasks_root_domain | spin_lock_irq(&css_set_lock) css_task_iter_start | list_move_tail(&cset->e_cset_node[ss->id] while(css_task_iter_next) | &dcgrp->e_csets[ss->id]); css_task_iter_end | spin_unlock_irq(&css_set_lock) mutex_unlock(&cpuset_mutex) | mutex_unlock(&cgroup_mutex) Inside css_task_iter_start/next/end, css_set_lock is hold and then released, so when iterating task(left side), the css_set may be moved to another list(right side), then it->cset_head points to the old list head and it->cset_pos->next points to the head node of new list, which can't be used as struct css_set. To fix this issue, switch from all css_sets to only scgrp's css_sets to patch in-flight iterators to preserve correct iteration, and then update it->cset_head as well. Reported-by: Gaosheng Cui Link: https://www.spinics.net/lists/cgroups/msg37935.html Suggested-by: Michal Koutný Link: https://lore.kernel.org/all/20230526114139.70274-1-xiujianfeng@huaweicloud.com/ Signed-off-by: Xiu Jianfeng Fixes: 2d8f243a5e6e ("cgroup: implement cgroup->e_csets[]") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 245cf62ce85a..4d42f0cbc11e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1798,7 +1798,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; - int ssid, i, ret; + int ssid, ret; u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1842,7 +1842,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); - struct css_set *cset; + struct css_set *cset, *cset_pos; + struct css_task_iter *it; WARN_ON(!css || cgroup_css(dcgrp, ss)); @@ -1860,9 +1861,22 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) css->cgroup = dcgrp; spin_lock_irq(&css_set_lock); - hash_for_each(css_set_table, i, cset, hlist) + WARN_ON(!list_empty(&dcgrp->e_csets[ss->id])); + list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id], + e_cset_node[ss->id]) { list_move_tail(&cset->e_cset_node[ss->id], &dcgrp->e_csets[ss->id]); + /* + * all css_sets of scgrp together in same order to dcgrp, + * patch in-flight iterators to preserve correct iteration. + * since the iterator is always advanced right away and + * finished when it->cset_pos meets it->cset_head, so only + * update it->cset_head is enough here. + */ + list_for_each_entry(it, &cset->task_iters, iters_node) + if (it->cset_head == &scgrp->e_csets[ss->id]) + it->cset_head = &dcgrp->e_csets[ss->id]; + } spin_unlock_irq(&css_set_lock); if (ss->css_rstat_flush) {