From a80e49e2cc3145af014a8ae44f575829cc236192 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 17:47:18 +0200 Subject: [PATCH 1/8] nohz: Move nohz full init call to tick init This way we unbloat a bit main.c and more importantly we initialize nohz full after init_IRQ(). This dependency will be needed in further patches because nohz full needs irq work to raise its own IRQ. Information about the support for this ability on ARM64 is obtained on init_IRQ() which initialize the pointer to __smp_call_function. Since tick_init() is called right after init_IRQ(), this is a good place to call tick_nohz_init() and prepare for that dependency. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 2 -- init/main.c | 1 - kernel/time/tick-common.c | 1 + kernel/time/tick-internal.h | 7 +++++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 9a82c7dc3fdd..595ee86f5e0d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -181,14 +181,12 @@ static inline bool tick_nohz_full_cpu(int cpu) return cpumask_test_cpu(cpu, tick_nohz_full_mask); } -extern void tick_nohz_init(void); extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(struct task_struct *tsk); #else -static inline void tick_nohz_init(void) { } static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void __tick_nohz_full_check(void) { } diff --git a/init/main.c b/init/main.c index bb1aed928f21..8af2f1abfe38 100644 --- a/init/main.c +++ b/init/main.c @@ -577,7 +577,6 @@ asmlinkage __visible void __init start_kernel(void) local_irq_disable(); idr_init_cache(); rcu_init(); - tick_nohz_init(); context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0a0608edeb26..052b4b53c3d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -400,4 +400,5 @@ void tick_resume(void) void __init tick_init(void) { tick_broadcast_init(); + tick_nohz_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index c19c1d84b6f3..366aeb4f2c66 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -99,6 +99,13 @@ static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ +/* NO_HZ_FULL internal */ +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +# else +static inline void tick_nohz_init(void) { } +#endif + /* * Broadcasting support */ From c5c38ef3d70377dc504a6a3f611a3ec814bc757b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Sep 2014 15:43:02 +0200 Subject: [PATCH 2/8] irq_work: Introduce arch_irq_work_has_interrupt() The nohz full code needs irq work to trigger its own interrupt so that the subsystem can work even when the tick is stopped. Lets introduce arch_irq_work_has_interrupt() that archs can override to tell about their support for this ability. Signed-off-by: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 3 ++- arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/irq_work.h | 10 ++++++++++ include/linux/irq_work.h | 2 ++ 31 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/irq_work.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index e858aa0ad8af..a52cbf178c3a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index e76fd79f32b0..b8fffc1a2ac2 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 70cd84eb7fda..202905e7ea0c 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 0b3fcf86e6ba..d617789b1ebd 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -9,8 +9,8 @@ generic-y += current.h generic-y += delay.h generic-y += div64.h generic-y += dma.h -generic-y += emergency-restart.h generic-y += early_ioremap.h +generic-y += emergency-restart.h generic-y += errno.h generic-y += ftrace.h generic-y += hash.h @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 00a0f3ccd6eb..2a71b1cb9848 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += futex.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 0d93b9a79ca9..46ed6bb9c679 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 8dbdce8421b0..e77e0c1dbe75 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 31742dfadff9..802b94c4ca86 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += linkage.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 5b73921b6e9d..3caf05cabfc5 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 0e69796b58c7..5f234a5a2320 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += ioctls.h generic-y += iomap.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index e8317d2d6c8d..747320be9d0e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index accc10a3dc78..e02448b0648b 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index c67c94a2d672..dbaf9f3065e8 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index c29ead89a317..7b8111c8f937 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 27a3acda6c19..448143b8cabd 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cputime.h generic-y += device.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 335e5290ec75..57012ef1f51e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += cputime.h generic-y += current.h generic-y += emergency-restart.h generic-y += hash.h +generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index ecbd6676bd33..77eb1a68d13b 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 480af0d9c2f5..89b61d7dc790 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -31,6 +31,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index ecf25e6678ad..ffb024b8423f 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += exec.h generic-y += hash.h generic-y += hw_irq.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7f23f162ce9c..31e8f59aff38 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index b3fea0722ff1..773f86676588 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index aad209199f7e..1909d2a5b82f 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += cputime.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index c19e47dacb31..5a6c9acff0d2 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index cdd1b447bb6c..f5f94ce1692c 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += emergency-restart.h generic-y += exec.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 0aa5675e7025..e6462b8a6284 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -17,6 +17,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 7bd64aa2e94a..244b12c8cb39 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += hash.h generic-y += hw_irq.h generic-y += io.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1e5fb872a4aa..5a2bb53faa42 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3bf000fab0ae..8fa909cc6553 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,5 +7,6 @@ genhdr-y += unistd_x32.h generic-y += clkdev.h generic-y += cputime.h generic-y += early_ioremap.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += scatterlist.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index c3d20ba6eb86..105d38922c44 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hardirq.h generic-y += hash.h generic-y += ioctl.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/include/asm-generic/irq_work.h b/include/asm-generic/irq_work.h new file mode 100644 index 000000000000..a44f452c6590 --- /dev/null +++ b/include/asm-generic/irq_work.h @@ -0,0 +1,10 @@ +#ifndef __ASM_IRQ_WORK_H +#define __ASM_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} + +#endif /* __ASM_IRQ_WORK_H */ + diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index bf9422c3aefe..6b47b2ede405 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -42,6 +42,8 @@ void irq_work_run(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK +#include + bool irq_work_needs_cpu(void); #else static inline bool irq_work_needs_cpu(void) { return false; } From 76a33061b9323b7fdb220ae5fa116c10833ec22e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:37:19 +0200 Subject: [PATCH 3/8] irq_work: Force raised irq work to run on irq work interrupt The nohz full kick, which restarts the tick when any resource depend on it, can't be executed anywhere given the operation it does on timers. If it is called from the scheduler or timers code, chances are that we run into a deadlock. This is why we run the nohz full kick from an irq work. That way we make sure that the kick runs on a virgin context. However if that's the case when irq work runs in its own dedicated self-ipi, things are different for the big bunch of archs that don't support the self triggered way. In order to support them, irq works are also handled by the timer interrupt as fallback. Now when irq works run on the timer interrupt, the context isn't blank. More precisely, they can run in the context of the hrtimer that runs the tick. But the nohz kick cancels and restarts this hrtimer and cancelling an hrtimer from itself isn't allowed. This is why we run in an endless loop: Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2 CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34 Workqueue: btrfs-endio-write normal_work_helper [btrfs] ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000 Call Trace: ] dump_stack+0x4e/0x7a [] panic+0xd4/0x207 [] watchdog_overflow_callback+0x118/0x120 [] __perf_event_overflow+0xae/0x350 [] ? perf_event_task_disable+0xa0/0xa0 [] ? x86_perf_event_set_period+0xbf/0x150 [] perf_event_overflow+0x14/0x20 [] intel_pmu_handle_irq+0x206/0x410 [] perf_event_nmi_handler+0x2b/0x50 [] nmi_handle+0xd2/0x390 [] ? nmi_handle+0x5/0x390 [] ? match_held_lock+0x8/0x1b0 [] default_do_nmi+0x72/0x1c0 [] do_nmi+0xb8/0x100 [] end_repeat_nmi+0x1e/0x2e [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 <] lock_acquired+0xaf/0x450 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] _raw_spin_lock_irqsave+0x78/0x90 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] lock_hrtimer_base.isra.20+0x25/0x50 [] hrtimer_try_to_cancel+0x33/0x1e0 [] hrtimer_cancel+0x1a/0x30 [] tick_nohz_restart+0x17/0x90 [] __tick_nohz_full_check+0xc3/0x100 [] nohz_full_kick_work_func+0xe/0x10 [] irq_work_run_list+0x44/0x70 [] irq_work_run+0x2a/0x50 [] update_process_times+0x5b/0x70 [] tick_sched_handle.isra.21+0x25/0x60 [] tick_sched_timer+0x41/0x60 [] __run_hrtimer+0x72/0x470 [] ? tick_sched_do_timer+0xb0/0xb0 [] hrtimer_interrupt+0x117/0x270 [] local_apic_timer_interrupt+0x37/0x60 [] smp_apic_timer_interrupt+0x3f/0x50 [] apic_timer_interrupt+0x6f/0x80 To fix this we force non-lazy irq works to run on irq work self-IPIs when available. That ability of the arch to trigger irq work self IPIs is available with arch_irq_work_has_interrupt(). Reported-by: Catalin Iacob Reported-by: Dave Jones Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/irq_work.h | 1 + kernel/irq_work.c | 15 +++++++++++++-- kernel/time/timer.c | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6b47b2ede405..bf3fe719c7ce 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu); #endif void irq_work_run(void); +void irq_work_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK diff --git a/kernel/irq_work.c b/kernel/irq_work.c index e6bcbe756663..385b85aded19 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void) raised = &__get_cpu_var(raised_list); lazy = &__get_cpu_var(lazy_list); - if (llist_empty(raised) && llist_empty(lazy)) - return false; + + if (llist_empty(raised) || arch_irq_work_has_interrupt()) + if (llist_empty(lazy)) + return false; /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); @@ -171,6 +173,15 @@ void irq_work_run(void) } EXPORT_SYMBOL_GPL(irq_work_run); +void irq_work_tick(void) +{ + struct llist_head *raised = &__get_cpu_var(raised_list); + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); + irq_work_run_list(&__get_cpu_var(lazy_list)); +} + /* * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index aca5dfe2fa3d..9bbb8344ed3b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1385,7 +1385,7 @@ void update_process_times(int user_tick) rcu_check_callbacks(cpu, user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) - irq_work_run(); + irq_work_tick(); #endif scheduler_tick(); run_posix_cpu_timers(p); From 3010279f0fc36f0388872203e63ca49912f648fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:47:15 +0200 Subject: [PATCH 4/8] x86: Tell irq work about self IPI support x86 supports irq work self-IPIs when local apic is available. This is partly known on runtime so lets implement arch_irq_work_has_interrupt() accordingly. This should be safely called after setup_arch(). Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/irq_work.h | 11 +++++++++++ arch/x86/kernel/irq_work.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/irq_work.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 8fa909cc6553..3bf000fab0ae 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,6 +7,5 @@ genhdr-y += unistd_x32.h generic-y += clkdev.h generic-y += cputime.h generic-y += early_ioremap.h -generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += scatterlist.h diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h new file mode 100644 index 000000000000..78162f8e248b --- /dev/null +++ b/arch/x86/include/asm/irq_work.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IRQ_WORK_H +#define _ASM_IRQ_WORK_H + +#include + +static inline bool arch_irq_work_has_interrupt(void) +{ + return cpu_has_apic; +} + +#endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 1de84e3ab4e0..15d741ddfeeb 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -41,7 +41,7 @@ __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC - if (!cpu_has_apic) + if (!arch_irq_work_has_interrupt()) return; apic->send_IPI_self(IRQ_WORK_VECTOR); From 09f6edd424218eb69078551b2ecfada1f2d098eb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:47:53 +0200 Subject: [PATCH 5/8] arm: Tell irq work about self IPI support ARM irq work IPI support depends on SMP support. That information is partly known at early boottime. Lets implement arch_irq_work_has_interrupt() accordingly. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- arch/arm/include/asm/Kbuild | 1 - arch/arm/include/asm/irq_work.h | 11 +++++++++++ arch/arm/kernel/smp.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/irq_work.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 202905e7ea0c..70cd84eb7fda 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -11,7 +11,6 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h -generic-y += irq_work.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h diff --git a/arch/arm/include/asm/irq_work.h b/arch/arm/include/asm/irq_work.h new file mode 100644 index 000000000000..712d03e5973a --- /dev/null +++ b/arch/arm/include/asm/irq_work.h @@ -0,0 +1,11 @@ +#ifndef __ASM_ARM_IRQ_WORK_H +#define __ASM_ARM_IRQ_WORK_H + +#include + +static inline bool arch_irq_work_has_interrupt(void) +{ + return is_smp(); +} + +#endif /* _ASM_ARM_IRQ_WORK_H */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 9388a3d479e1..bbe22fcb78f6 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -503,7 +503,7 @@ void arch_send_call_function_single_ipi(int cpu) #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - if (is_smp()) + if (arch_irq_work_has_interrupt()) smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); } #endif From 3631073659d0aafeaa52227bb61a100efaf901dc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:48:05 +0200 Subject: [PATCH 6/8] arm64: Tell irq work about self IPI support ARM64 irq work self-IPI support depends on __smp_cross_call to point to some relevant IRQ controller operations. This information should be available after the call to init_IRQ(). Lets implement arch_irq_work_has_interrupt() accordingly. Acked-by: Peter Zijlstra (Intel) Acked-by: Catalin Marinas Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Frederic Weisbecker --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/irq_work.h | 11 +++++++++++ arch/arm64/include/asm/smp.h | 2 ++ arch/arm64/kernel/smp.c | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/irq_work.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index d617789b1ebd..c1968475cc4e 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -19,7 +19,6 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h -generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h new file mode 100644 index 000000000000..8e24ef3f7c82 --- /dev/null +++ b/arch/arm64/include/asm/irq_work.h @@ -0,0 +1,11 @@ +#ifndef __ASM_IRQ_WORK_H +#define __ASM_IRQ_WORK_H + +#include + +static inline bool arch_irq_work_has_interrupt(void) +{ + return !!__smp_cross_call; +} + +#endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index a498f2cd2c2a..780f82c827b6 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -48,6 +48,8 @@ extern void smp_init_cpus(void); */ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); +extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); + /* * Called from the secondary holding pen, this is the secondary CPU entry point. */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 474339718105..b06d1d90ee8c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -470,7 +470,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static void (*__smp_cross_call)(const struct cpumask *, unsigned int); +void (*__smp_cross_call)(const struct cpumask *, unsigned int); void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) { From 4327b15f64b2580dad40d2674d50fc44f1b699c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 17 Aug 2014 22:02:55 +0200 Subject: [PATCH 7/8] nohz: Consolidate nohz full init code The supports for CONFIG_NO_HZ_FULL_ALL=y and the nohz_full= kernel parameter both have their own way to do the same thing: allocate full dynticks cpumasks, fill them and initialize some state variables. Lets consolidate that all in the same place. While at it, convert some regular printk message to warnings when fundamental allocations fail. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- kernel/time/tick-sched.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f654a8a298fa..eb4af016ac65 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -295,22 +295,12 @@ void __tick_nohz_task_switch(struct task_struct *tsk) /* Parse the boot-time nohz CPU list from the kernel parameters. */ static int __init tick_nohz_full_setup(char *str) { - int cpu; - alloc_bootmem_cpumask_var(&tick_nohz_full_mask); - alloc_bootmem_cpumask_var(&housekeeping_mask); if (cpulist_parse(str, tick_nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); + free_bootmem_cpumask_var(tick_nohz_full_mask); return 1; } - - cpu = smp_processor_id(); - if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { - pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); - cpumask_clear_cpu(cpu, tick_nohz_full_mask); - } - cpumask_andnot(housekeeping_mask, - cpu_possible_mask, tick_nohz_full_mask); tick_nohz_full_running = true; return 1; @@ -349,18 +339,11 @@ static int tick_nohz_init_all(void) #ifdef CONFIG_NO_HZ_FULL_ALL if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { - pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); - return err; - } - if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { - pr_err("NO_HZ: Can't allocate not-full dynticks cpumask\n"); + WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n"); return err; } err = 0; cpumask_setall(tick_nohz_full_mask); - cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); - cpumask_clear(housekeeping_mask); - cpumask_set_cpu(smp_processor_id(), housekeeping_mask); tick_nohz_full_running = true; #endif return err; @@ -375,6 +358,23 @@ void __init tick_nohz_init(void) return; } + if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { + WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n"); + cpumask_clear(tick_nohz_full_mask); + tick_nohz_full_running = false; + return; + } + + cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, tick_nohz_full_mask); + } + + cpumask_andnot(housekeeping_mask, + cpu_possible_mask, tick_nohz_full_mask); + for_each_cpu(cpu, tick_nohz_full_mask) context_tracking_cpu_set(cpu); From 9b01f5bf3999a3db5b1bbd9fdfd80d8d304e94ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 18 Aug 2014 01:36:07 +0200 Subject: [PATCH 8/8] nohz: nohz full depends on irq work self IPI support The nohz full functionality depends on IRQ work to trigger its own interrupts. As it's used to restart the tick, we can't rely on the tick fallback for irq work callbacks, ie: we can't use the tick to restart the tick itself. Lets reject the full dynticks initialization if that arch support isn't available. As a side effect, this makes sure that nohz kick is never called from the tick. That otherwise would result in illegal hrtimer self-cancellation and lockup. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- kernel/time/tick-sched.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index eb4af016ac65..5a9ff243588c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -365,6 +365,20 @@ void __init tick_nohz_init(void) return; } + /* + * Full dynticks uses irq work to drive the tick rescheduling on safe + * locking contexts. But then we need irq work to raise its own + * interrupts to avoid circular dependency on the tick + */ + if (!arch_irq_work_has_interrupt()) { + pr_warning("NO_HZ: Can't run full dynticks because arch doesn't " + "support irq work self-IPIs\n"); + cpumask_clear(tick_nohz_full_mask); + cpumask_copy(housekeeping_mask, cpu_possible_mask); + tick_nohz_full_running = false; + return; + } + cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {