diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0c5c5f4fce6..fd5c913c33c1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1885,6 +1885,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. arch_perfmon: [X86] Force use of architectural perfmon on Intel CPUs instead of the CPU specific event set. + timer: [X86] Force use of architectural NMI + timer mode (see also oprofile.timer + for generic hr timer mode) oops=panic Always panic on oopses. Default is to just kill the process, but there is a small probability of diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669cbb3b0..2505740b81d2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config OPROFILE_NMI_TIMER + def_bool y + depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b6..1599f568f0e2 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c54deca..9e138d00ad36 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -16,34 +16,23 @@ * with the NMI mode driver. */ +#ifdef CONFIG_X86_LOCAL_APIC extern int op_nmi_init(struct oprofile_operations *ops); -extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +#else +static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } +static void op_nmi_exit(void) { } +#endif +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations *ops) { - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif -#ifdef CONFIG_X86_IO_APIC - if (ret < 0) - ret = op_nmi_timer_init(ops); -#endif ops->backtrace = x86_backtrace; - - return ret; + return op_nmi_init(ops); } - void oprofile_arch_exit(void) { -#ifdef CONFIG_X86_LOCAL_APIC op_nmi_exit(); -#endif } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 75f9528e0372..26b8a8514ee5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type) return 0; } -static int force_arch_perfmon; -static int force_cpu_type(const char *str, struct kernel_param *kp) +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "arch_perfmon")) { - force_arch_perfmon = 1; + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; } return 0; } -module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) return 0; /* @@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 7f8052cd6620..000000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,50 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) -{ - oprofile_add_sample(regs, 0); - return NMI_HANDLED; -} - -static int timer_start(void) -{ - if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, - 0, "oprofile-timer")) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c new file mode 100644 index 000000000000..76f1c9357f39 --- /dev/null +++ b/drivers/oprofile/nmi_timer_int.c @@ -0,0 +1,173 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2011 Advanced Micro Devices, Inc. + * + * @author Robert Richter + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_OPROFILE_NMI_TIMER + +static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); +static int ctr_running; + +static struct perf_event_attr nmi_timer_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static void nmi_timer_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + event->hw.interrupts = 0; /* don't throttle interrupts */ + oprofile_add_sample(regs, 0); +} + +static int nmi_timer_start_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (!event) { + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, + nmi_timer_callback, NULL); + if (IS_ERR(event)) + return PTR_ERR(event); + per_cpu(nmi_timer_events, cpu) = event; + } + + if (event && ctr_running) + perf_event_enable(event); + + return 0; +} + +static void nmi_timer_stop_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (event && ctr_running) + perf_event_disable(event); +} + +static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + nmi_timer_start_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nmi_timer_stop_cpu(cpu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block nmi_timer_cpu_nb = { + .notifier_call = nmi_timer_cpu_notifier +}; + +static int nmi_timer_start(void) +{ + int cpu; + + get_online_cpus(); + ctr_running = 1; + for_each_online_cpu(cpu) + nmi_timer_start_cpu(cpu); + put_online_cpus(); + + return 0; +} + +static void nmi_timer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + nmi_timer_stop_cpu(cpu); + ctr_running = 0; + put_online_cpus(); +} + +static void nmi_timer_shutdown(void) +{ + struct perf_event *event; + int cpu; + + get_online_cpus(); + unregister_cpu_notifier(&nmi_timer_cpu_nb); + for_each_possible_cpu(cpu) { + event = per_cpu(nmi_timer_events, cpu); + if (!event) + continue; + perf_event_disable(event); + per_cpu(nmi_timer_events, cpu) = NULL; + perf_event_release_kernel(event); + } + + put_online_cpus(); +} + +static int nmi_timer_setup(void) +{ + int cpu, err; + u64 period; + + /* clock cycles per tick: */ + period = (u64)cpu_khz * 1000; + do_div(period, HZ); + nmi_timer_attr.sample_period = period; + + get_online_cpus(); + err = register_cpu_notifier(&nmi_timer_cpu_nb); + if (err) + goto out; + /* can't attach events to offline cpus: */ + for_each_online_cpu(cpu) { + err = nmi_timer_start_cpu(cpu); + if (err) + break; + } + if (err) + nmi_timer_shutdown(); +out: + put_online_cpus(); + return err; +} + +int __init op_nmi_timer_init(struct oprofile_operations *ops) +{ + int err = 0; + + err = nmi_timer_setup(); + if (err) + return err; + nmi_timer_shutdown(); /* only check, don't alloc */ + + ops->create_files = NULL; + ops->setup = nmi_timer_setup; + ops->shutdown = nmi_timer_shutdown; + ops->start = nmi_timer_start; + ops->stop = nmi_timer_stop; + ops->cpu_type = "timer"; + + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + + return 0; +} + +#endif diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index dccd8636095c..ed2c3ec07024 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,26 +239,39 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } +static int timer_mode; + static int __init oprofile_init(void) { int err; + /* always init architecture to setup backtrace support */ + timer_mode = 0; err = oprofile_arch_init(&oprofile_ops); - if (err < 0 || timer) { - printk(KERN_INFO "oprofile: using timer interrupt.\n"); + if (!err) { + if (!timer && !oprofilefs_register()) + return 0; + oprofile_arch_exit(); + } + + /* setup timer mode: */ + timer_mode = 1; + /* no nmi timer mode if oprofile.timer is set */ + if (timer || op_nmi_timer_init(&oprofile_ops)) { err = oprofile_timer_init(&oprofile_ops); if (err) return err; } + return oprofilefs_register(); } static void __exit oprofile_exit(void) { - oprofile_timer_exit(); oprofilefs_unregister(); - oprofile_arch_exit(); + if (!timer_mode) + oprofile_arch_exit(); } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 177b73de5e5f..769fb0fcac44 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -36,6 +36,15 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); +#ifdef CONFIG_OPROFILE_NMI_TIMER +int op_nmi_timer_init(struct oprofile_operations *ops); +#else +static inline int op_nmi_timer_init(struct oprofile_operations *ops) +{ + return -ENODEV; +} +#endif + int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 3ef44624f510..93404f72dfa8 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,23 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int oprofile_timer_init(struct oprofile_operations *ops) +static int oprofile_hrtimer_setup(void) { - int rc; - - rc = register_hotcpu_notifier(&oprofile_cpu_notifier); - if (rc) - return rc; - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = oprofile_hrtimer_start; - ops->stop = oprofile_hrtimer_stop; - ops->cpu_type = "timer"; - return 0; + return register_hotcpu_notifier(&oprofile_cpu_notifier); } -void oprofile_timer_exit(void) +static void oprofile_hrtimer_shutdown(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } + +int oprofile_timer_init(struct oprofile_operations *ops) +{ + ops->create_files = NULL; + ops->setup = oprofile_hrtimer_setup; + ops->shutdown = oprofile_hrtimer_shutdown; + ops->start = oprofile_hrtimer_start; + ops->stop = oprofile_hrtimer_stop; + ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + return 0; +} diff --git a/kernel/events/core.c b/kernel/events/core.c index 8d9dea56c262..924338bb489c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1322,6 +1322,7 @@ void perf_event_disable(struct perf_event *event) } raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, @@ -1806,6 +1807,7 @@ void perf_event_enable(struct perf_event *event) out: raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_enable); int perf_event_refresh(struct perf_event *event, int refresh) {