linux-stable/include/linux/resume_user_mode.h

/* SPDX-License-Identifier: GPL-2.0-only */

#ifndef LINUX_RESUME_USER_MODE_H
#define LINUX_RESUME_USER_MODE_H

#include <linux/sched.h>
#include <linux/task_work.h>
#include <linux/memcontrol.h>
#include <linux/blk-cgroup.h>

/**
 * set_notify_resume - cause resume_user_mode_work() to be called
 * @task:		task that will call resume_user_mode_work()
 *
 * Calling this arranges that @task will call resume_user_mode_work()
 * before returning to user mode.  If it's already running in user mode,
 * it will enter the kernel and call resume_user_mode_work() soon.
 * If it's blocked, it will not be woken.
 */
static inline void set_notify_resume(struct task_struct *task)
{
	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
		kick_process(task);
}


/**
 * resume_user_mode_work - Perform work before returning to user mode
 * @regs:		user-mode registers of @current task
 *
 * This is called when %TIF_NOTIFY_RESUME has been set.  Now we are
 * about to return to user mode, and the user state in @regs can be
 * inspected or adjusted.  The caller in arch code has cleared
 * %TIF_NOTIFY_RESUME before the call.  If the flag gets set again
 * asynchronously, this will be called again before we return to
 * user mode.
 *
 * Called without locks.
 */
static inline void resume_user_mode_work(struct pt_regs *regs)
{
	clear_thread_flag(TIF_NOTIFY_RESUME);
	/*
	 * This barrier pairs with task_work_add()->set_notify_resume() after
	 * hlist_add_head(task->task_works);
	 */
	smp_mb__after_atomic();
	if (unlikely(task_work_pending(current)))
		task_work_run();

#ifdef CONFIG_KEYS_REQUEST_CACHE
	if (unlikely(current->cached_requested_key)) {
		key_put(current->cached_requested_key);
		current->cached_requested_key = NULL;
	}
#endif

	mem_cgroup_handle_over_high(GFP_KERNEL);
	blkcg_maybe_throttle_current();

	rseq_handle_notify_resume(NULL, regs);
}

#endif /* LINUX_RESUME_USER_MODE_H */
resume_user_mode: Move to resume_user_mode.h Move set_notify_resume and tracehook_notify_resume into resume_user_mode.h. While doing that rename tracehook_notify_resume to resume_user_mode_work. Update all of the places that included tracehook.h for these functions to include resume_user_mode.h instead. Update all of the callers of tracehook_notify_resume to call resume_user_mode_work. Reviewed-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20220309162454.123006-12-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> 2022-02-09 18:20:45 +00:00			`/* SPDX-License-Identifier: GPL-2.0-only */`

			`#ifndef LINUX_RESUME_USER_MODE_H`
			`#define LINUX_RESUME_USER_MODE_H`

			`#include <linux/sched.h>`
			`#include <linux/task_work.h>`
			`#include <linux/memcontrol.h>`
			`#include <linux/blk-cgroup.h>`

			`/**`
			`* set_notify_resume - cause resume_user_mode_work() to be called`
			`* @task: task that will call resume_user_mode_work()`
			`*`
			`* Calling this arranges that @task will call resume_user_mode_work()`
			`* before returning to user mode. If it's already running in user mode,`
			`* it will enter the kernel and call resume_user_mode_work() soon.`
			`* If it's blocked, it will not be woken.`
			`*/`
			`static inline void set_notify_resume(struct task_struct *task)`
			`{`
			`if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))`
			`kick_process(task);`
			`}`


			`/**`
			`* resume_user_mode_work - Perform work before returning to user mode`
			`* @regs: user-mode registers of @current task`
			`*`
			`* This is called when %TIF_NOTIFY_RESUME has been set. Now we are`
			`* about to return to user mode, and the user state in @regs can be`
			`* inspected or adjusted. The caller in arch code has cleared`
			`* %TIF_NOTIFY_RESUME before the call. If the flag gets set again`
			`* asynchronously, this will be called again before we return to`
			`* user mode.`
			`*`
			`* Called without locks.`
			`*/`
			`static inline void resume_user_mode_work(struct pt_regs *regs)`
			`{`
			`clear_thread_flag(TIF_NOTIFY_RESUME);`
			`/*`
			`* This barrier pairs with task_work_add()->set_notify_resume() after`
			`* hlist_add_head(task->task_works);`
			`*/`
			`smp_mb__after_atomic();`
			`if (unlikely(task_work_pending(current)))`
			`task_work_run();`

			`#ifdef CONFIG_KEYS_REQUEST_CACHE`
			`if (unlikely(current->cached_requested_key)) {`
			`key_put(current->cached_requested_key);`
			`current->cached_requested_key = NULL;`
			`}`
			`#endif`

mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement Breno and Josef report a deadlock scenario from cgroup reclaim re-entering the filesystem: [ 361.546690] ====================================================== [ 361.559210] WARNING: possible circular locking dependency detected [ 361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S E [ 361.589704] ------------------------------------------------------ [ 361.602277] find/9315 is trying to acquire lock: [ 361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0 [ 361.631437] [ 361.631437] but task is already holding lock: [ 361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40 [ 362.904457] mutex_lock_nested+0x1c/0x30 [ 362.912414] __btrfs_release_delayed_node+0x68/0x4f0 [ 362.922460] btrfs_evict_inode+0x301/0x770 [ 362.982726] evict+0x17c/0x380 [ 362.988944] prune_icache_sb+0x100/0x1d0 [ 363.005559] super_cache_scan+0x1f8/0x260 [ 363.013695] do_shrink_slab+0x2a2/0x540 [ 363.021489] shrink_slab_memcg+0x237/0x3d0 [ 363.050606] shrink_slab+0xa7/0x240 [ 363.083382] shrink_node_memcgs+0x262/0x3b0 [ 363.091870] shrink_node+0x1a4/0x720 [ 363.099150] shrink_zones+0x1f6/0x5d0 [ 363.148798] do_try_to_free_pages+0x19b/0x5e0 [ 363.157633] try_to_free_mem_cgroup_pages+0x266/0x370 [ 363.190575] reclaim_high+0x16f/0x1f0 [ 363.208409] mem_cgroup_handle_over_high+0x10b/0x270 [ 363.246678] try_charge_memcg+0xaf2/0xc70 [ 363.304151] charge_memcg+0xf0/0x350 [ 363.320070] __mem_cgroup_charge+0x28/0x40 [ 363.328371] __filemap_add_folio+0x870/0xd50 [ 363.371303] filemap_add_folio+0xdd/0x310 [ 363.399696] __filemap_get_folio+0x2fc/0x7d0 [ 363.419086] pagecache_get_page+0xe/0x30 [ 363.427048] alloc_extent_buffer+0x1cd/0x6a0 [ 363.435704] read_tree_block+0x43/0xc0 [ 363.443316] read_block_for_search+0x361/0x510 [ 363.466690] btrfs_search_slot+0xc8c/0x1520 This is caused by the mem_cgroup_handle_over_high() not respecting the gfp_mask of the allocation context. We used to only call this function on resume to userspace, where no locks were held. But c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added a call from the allocation context without considering the gfp. Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Breno Leitao <leitao@debian.org> Reported-by: Josef Bacik <josef@toxicpanda.com> Acked-by: Shakeel Butt <shakeelb@google.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Muchun Song <songmuchun@bytedance.com> Cc: <stable@vger.kernel.org> [5.17+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> 2023-09-14 15:21:39 +00:00			`mem_cgroup_handle_over_high(GFP_KERNEL);`
resume_user_mode: Move to resume_user_mode.h Move set_notify_resume and tracehook_notify_resume into resume_user_mode.h. While doing that rename tracehook_notify_resume to resume_user_mode_work. Update all of the places that included tracehook.h for these functions to include resume_user_mode.h instead. Update all of the callers of tracehook_notify_resume to call resume_user_mode_work. Reviewed-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20220309162454.123006-12-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> 2022-02-09 18:20:45 +00:00			`blkcg_maybe_throttle_current();`

			`rseq_handle_notify_resume(NULL, regs);`
			`}`

			`#endif /* LINUX_RESUME_USER_MODE_H */`