linux-stable/kernel/hung_task.c

296 lines
6.8 KiB
C
Raw Permalink Normal View History

/*
* Detect Hung Task
*
* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
*
*/
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/export.h>
#include <linux/sysctl.h>
#include <linux/suspend.h>
#include <linux/utsname.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <trace/events/sched.h>
/*
* The number of tasks checked:
*/
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
/*
* Limit number of tasks checked in a batch.
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section. It also controls the size of
* the RCU grace period. So it needs to be upper-bound.
*/
#define HUNG_TASK_LOCK_BREAK (HZ / 10)
/*
* Zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
static bool hung_task_show_lock;
kernel/hung_task.c: show all hung tasks before panic [ Upstream commit 401c636a0eeb0d51862fce222da1bf08e3a0ffd0 ] When we get a hung task it can often be valuable to see _all_ the hung tasks on the system before calling panic(). Quoting from https://syzkaller.appspot.com/text?tag=CrashReport&id=5316056503549952 ---------------------------------------- INFO: task syz-executor0:6540 blocked for more than 120 seconds. Not tainted 4.16.0+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor0 D23560 6540 4521 0x80000004 Call Trace: context_switch kernel/sched/core.c:2848 [inline] __schedule+0x8fb/0x1ef0 kernel/sched/core.c:3490 schedule+0xf5/0x430 kernel/sched/core.c:3549 schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3607 __mutex_lock_common kernel/locking/mutex.c:833 [inline] __mutex_lock+0xb7f/0x1810 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0x1759/0x1e00 block/ioctl.c:601 ioctl_by_bdev+0xa5/0x110 fs/block_dev.c:2060 isofs_get_last_session fs/isofs/inode.c:567 [inline] isofs_fill_super+0x2ba9/0x3bc0 fs/isofs/inode.c:660 mount_bdev+0x2b7/0x370 fs/super.c:1119 isofs_mount+0x34/0x40 fs/isofs/inode.c:1560 mount_fs+0x66/0x2d0 fs/super.c:1222 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:2514 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0xea4/0x2b90 fs/namespace.c:2847 ksys_mount+0xab/0x120 fs/namespace.c:3063 SYSC_mount fs/namespace.c:3077 [inline] SyS_mount+0x39/0x50 fs/namespace.c:3074 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 (...snipped...) Showing all locks held in the system: (...snipped...) 2 locks held by syz-executor0/6540: #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: alloc_super fs/super.c:211 [inline] #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: sget_userns+0x3b2/0xe60 fs/super.c:502 /* down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); */ #1: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ (...snipped...) 3 locks held by syz-executor7/6541: #0: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ #1: 000000007bf3d3f9 (&bdev->bd_mutex){+.+.}, at: blkdev_reread_part+0x1e/0x40 block/ioctl.c:192 #2: 00000000566d4c39 (&type->s_umount_key#50){.+.+}, at: __get_super.part.10+0x1d3/0x280 fs/super.c:663 /* down_read(&sb->s_umount); */ ---------------------------------------- When reporting an AB-BA deadlock like shown above, it would be nice if trace of PID=6541 is printed as well as trace of PID=6540 before calling panic(). Showing hung tasks up to /proc/sys/kernel/hung_task_warnings could delay calling panic() but normally there should not be so many hung tasks. Link: http://lkml.kernel.org/r/201804050705.BHE57833.HVFOFtSOMQJFOL@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Dmitry Vyukov <dvyukov@google.com> Cc: Vegard Nossum <vegard.nossum@oracle.com> Cc: Mandeep Singh Baines <msb@chromium.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-08 00:10:34 +00:00
static bool hung_task_call_panic;
static struct task_struct *watchdog_task;
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* hung task is detected:
*/
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
static int __init hung_task_panic_setup(char *str)
{
int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
if (rc)
return rc;
return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
return NOTIFY_DONE;
}
static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
/*
* Ensure the task is not frozen.
* Also, skip vfork and any other user process that freezer should skip.
*/
if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
return;
/*
* When a freshly created task is scheduled once, changes its state to
* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
* musn't be checked.
*/
if (unlikely(!switch_count))
return;
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
return;
}
trace_sched_process_hang(t);
kernel/hung_task.c: force console verbose before panic [ Upstream commit 168e06f7937d96c7222037d8a05565e8a6eb00fe ] Based on commit 401c636a0eeb ("kernel/hung_task.c: show all hung tasks before panic"), we could get the call stack of hung task. However, if the console loglevel is not high, we still can not see the useful panic information in practice, and in most cases users don't set console loglevel to high level. This patch is to force console verbose before system panic, so that the real useful information can be seen in the console, instead of being like the following, which doesn't have hung task information. INFO: task init:1 blocked for more than 120 seconds. Tainted: G U W 4.19.0-quilt-2e5dc0ac-g51b6c21d76cc #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. Kernel panic - not syncing: hung_task: blocked tasks CPU: 2 PID: 479 Comm: khungtaskd Tainted: G U W 4.19.0-quilt-2e5dc0ac-g51b6c21d76cc #1 Call Trace: dump_stack+0x4f/0x65 panic+0xde/0x231 watchdog+0x290/0x410 kthread+0x12c/0x150 ret_from_fork+0x35/0x40 reboot: panic mode set: p,w Kernel Offset: 0x34000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Link: http://lkml.kernel.org/r/27240C0AC20F114CBF8149A2696CBE4A6015B675@SHSMSX101.ccr.corp.intel.com Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-01-03 23:26:27 +00:00
if (sysctl_hung_task_panic) {
console_verbose();
hung_task_show_lock = true;
hung_task_call_panic = true;
}
/*
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
if (sysctl_hung_task_warnings) {
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
t->comm, t->pid, timeout);
pr_err(" %s %s %.*s\n",
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
hung_task_show_lock = true;
}
touch_nmi_watchdog();
}
/*
* To avoid extending the RCU grace period for an unbounded amount of time,
* periodically exit the critical section and enter a new one.
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
* to exit the grace period. For classic RCU, a reschedule is required.
*/
static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
{
bool can_cont;
get_task_struct(g);
get_task_struct(t);
rcu_read_unlock();
cond_resched();
rcu_read_lock();
can_cont = pid_alive(g) && pid_alive(t);
put_task_struct(t);
put_task_struct(g);
return can_cont;
}
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time (120 seconds). If that happens, print out
* a warning.
*/
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
unsigned long last_break = jiffies;
struct task_struct *g, *t;
/*
* If the system crashed already then all bets are off,
* do not report extra hung tasks:
*/
if (test_taint(TAINT_DIE) || did_panic)
return;
hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
goto unlock;
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
if (!rcu_lock_break(g, t))
goto unlock;
last_break = jiffies;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, timeout);
}
unlock:
rcu_read_unlock();
if (hung_task_show_lock)
debug_show_all_locks();
kernel/hung_task.c: show all hung tasks before panic [ Upstream commit 401c636a0eeb0d51862fce222da1bf08e3a0ffd0 ] When we get a hung task it can often be valuable to see _all_ the hung tasks on the system before calling panic(). Quoting from https://syzkaller.appspot.com/text?tag=CrashReport&id=5316056503549952 ---------------------------------------- INFO: task syz-executor0:6540 blocked for more than 120 seconds. Not tainted 4.16.0+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor0 D23560 6540 4521 0x80000004 Call Trace: context_switch kernel/sched/core.c:2848 [inline] __schedule+0x8fb/0x1ef0 kernel/sched/core.c:3490 schedule+0xf5/0x430 kernel/sched/core.c:3549 schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3607 __mutex_lock_common kernel/locking/mutex.c:833 [inline] __mutex_lock+0xb7f/0x1810 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0x1759/0x1e00 block/ioctl.c:601 ioctl_by_bdev+0xa5/0x110 fs/block_dev.c:2060 isofs_get_last_session fs/isofs/inode.c:567 [inline] isofs_fill_super+0x2ba9/0x3bc0 fs/isofs/inode.c:660 mount_bdev+0x2b7/0x370 fs/super.c:1119 isofs_mount+0x34/0x40 fs/isofs/inode.c:1560 mount_fs+0x66/0x2d0 fs/super.c:1222 vfs_kern_mount.part.26+0xc6/0x4a0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:2514 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0xea4/0x2b90 fs/namespace.c:2847 ksys_mount+0xab/0x120 fs/namespace.c:3063 SYSC_mount fs/namespace.c:3077 [inline] SyS_mount+0x39/0x50 fs/namespace.c:3074 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 (...snipped...) Showing all locks held in the system: (...snipped...) 2 locks held by syz-executor0/6540: #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: alloc_super fs/super.c:211 [inline] #0: 00000000566d4c39 (&type->s_umount_key#49/1){+.+.}, at: sget_userns+0x3b2/0xe60 fs/super.c:502 /* down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); */ #1: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ (...snipped...) 3 locks held by syz-executor7/6541: #0: 0000000043ca8836 (&lo->lo_ctl_mutex/1){+.+.}, at: lo_ioctl+0x8b/0x1b70 drivers/block/loop.c:1355 /* mutex_lock_nested(&lo->lo_ctl_mutex, 1); */ #1: 000000007bf3d3f9 (&bdev->bd_mutex){+.+.}, at: blkdev_reread_part+0x1e/0x40 block/ioctl.c:192 #2: 00000000566d4c39 (&type->s_umount_key#50){.+.+}, at: __get_super.part.10+0x1d3/0x280 fs/super.c:663 /* down_read(&sb->s_umount); */ ---------------------------------------- When reporting an AB-BA deadlock like shown above, it would be nice if trace of PID=6541 is printed as well as trace of PID=6540 before calling panic(). Showing hung tasks up to /proc/sys/kernel/hung_task_warnings could delay calling panic() but normally there should not be so many hung tasks. Link: http://lkml.kernel.org/r/201804050705.BHE57833.HVFOFtSOMQJFOL@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Dmitry Vyukov <dvyukov@google.com> Cc: Vegard Nossum <vegard.nossum@oracle.com> Cc: Mandeep Singh Baines <msb@chromium.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-08 00:10:34 +00:00
if (hung_task_call_panic) {
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
}
static long hung_timeout_jiffies(unsigned long last_checked,
unsigned long timeout)
{
/* timeout of 0 will disable the watchdog */
return timeout ? last_checked - jiffies + timeout * HZ :
MAX_SCHEDULE_TIMEOUT;
}
/*
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto out;
wake_up_process(watchdog_task);
out:
return ret;
}
static atomic_t reset_hung_task = ATOMIC_INIT(0);
void reset_hung_task_detector(void)
{
atomic_set(&reset_hung_task, 1);
}
EXPORT_SYMBOL_GPL(reset_hung_task_detector);
static bool hung_detector_suspended;
static int hungtask_pm_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
switch (action) {
case PM_SUSPEND_PREPARE:
case PM_HIBERNATION_PREPARE:
case PM_RESTORE_PREPARE:
hung_detector_suspended = true;
break;
case PM_POST_SUSPEND:
case PM_POST_HIBERNATION:
case PM_POST_RESTORE:
hung_detector_suspended = false;
break;
default:
break;
}
return NOTIFY_OK;
}
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog(void *dummy)
{
unsigned long hung_last_checked = jiffies;
set_user_nice(current, 0);
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
long t = hung_timeout_jiffies(hung_last_checked, timeout);
if (t <= 0) {
if (!atomic_xchg(&reset_hung_task, 0) &&
!hung_detector_suspended)
check_hung_uninterruptible_tasks(timeout);
hung_last_checked = jiffies;
continue;
}
schedule_timeout_interruptible(t);
}
return 0;
}
static int __init hung_task_init(void)
{
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
/* Disable hung task detector on suspend */
pm_notifier(hungtask_pm_notify, 0);
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
return 0;
}
2014-04-03 21:48:35 +00:00
subsys_initcall(hung_task_init);