linux-stable/include/linux/kernel_stat.h
Thomas Gleixner c291ee6221 genirq: Prevent proc race against freeing of irq descriptors
Since the rework of the sparse interrupt code to actually free the
unused interrupt descriptors there exists a race between the /proc
interfaces to the irq subsystem and the code which frees the interrupt
descriptor.

CPU0				CPU1
				show_interrupts()
				  desc = irq_to_desc(X);
free_desc(desc)
  remove_from_radix_tree();
  kfree(desc);
				  raw_spinlock_irq(&desc->lock);

/proc/interrupts is the only interface which can actively corrupt
kernel memory via the lock access. /proc/stat can only read from freed
memory. Extremly hard to trigger, but possible.

The interfaces in /proc/irq/N/ are not affected by this because the
removal of the proc file is serialized in procfs against concurrent
readers/writers. The removal happens before the descriptor is freed.

For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue
as the descriptor is never freed. It's merely cleared out with the irq
descriptor lock held. So any concurrent proc access will either see
the old correct value or the cleared out ones.

Protect the lookup and access to the irq descriptor in
show_interrupts() with the sparse_irq_lock.

Provide kstat_irqs_usr() which is protecting the lookup and access
with sparse_irq_lock and switch /proc/stat to use it.

Document the existing kstat_irqs interfaces so it's clear that the
caller needs to take care about protection. The users of these
interfaces are either not affected due to SPARSE_IRQ=n or already
protected against removal.

Fixes: 1f5a5b87f7 "genirq: Implement a sane sparse_irq allocator"
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
2014-12-13 13:33:07 +01:00

98 lines
2.4 KiB
C

#ifndef _LINUX_KERNEL_STAT_H
#define _LINUX_KERNEL_STAT_H
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/vtime.h>
#include <asm/irq.h>
#include <linux/cputime.h>
/*
* 'kernel_stat.h' contains the definitions needed for doing
* some kernel statistics (CPU usage, context switches ...),
* used by rstatd/perfmeter
*/
enum cpu_usage_stat {
CPUTIME_USER,
CPUTIME_NICE,
CPUTIME_SYSTEM,
CPUTIME_SOFTIRQ,
CPUTIME_IRQ,
CPUTIME_IDLE,
CPUTIME_IOWAIT,
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
NR_STATS,
};
struct kernel_cpustat {
u64 cpustat[NR_STATS];
};
struct kernel_stat {
unsigned long irqs_sum;
unsigned int softirqs[NR_SOFTIRQS];
};
DECLARE_PER_CPU(struct kernel_stat, kstat);
DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
/* Must have preemption disabled for this to be meaningful. */
#define kstat_this_cpu this_cpu_ptr(&kstat)
#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat)
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
extern unsigned long long nr_context_switches(void);
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
extern void kstat_incr_irq_this_cpu(unsigned int irq);
static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
{
__this_cpu_inc(kstat.softirqs[irq]);
}
static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
{
return kstat_cpu(cpu).softirqs[irq];
}
/*
* Number of interrupts per specific IRQ source, since bootup
*/
extern unsigned int kstat_irqs(unsigned int irq);
extern unsigned int kstat_irqs_usr(unsigned int irq);
/*
* Number of interrupts per cpu, since bootup
*/
static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
{
return kstat_cpu(cpu).irqs_sum;
}
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
}
#else
extern void account_process_tick(struct task_struct *, int user);
#endif
extern void account_steal_ticks(unsigned long ticks);
extern void account_idle_ticks(unsigned long ticks);
#endif /* _LINUX_KERNEL_STAT_H */