From aa27497c2fb4c7f57706099bd489e683e5cc3e3b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 5 Apr 2010 17:11:05 +0800 Subject: [PATCH 01/15] tracing: Fix uninitialized variable of tracing/trace output Because a local variable is not initialized, I got these when I did 'cat tracing/trace'. (not trace_pipe): CPU:0 [LOST 18446744071579453134 EVENTS] ps-3099 [000] 560.770221: lock_acquire: ffff880030865010 &(&dentry->d_lock)->rlock CPU:0 [LOST 18446744071579453134 EVENTS] ps-3099 [000] 560.770221: lock_release: ffff880030865010 &(&dentry->d_lock)->rlock CPU:0 [LOST 18446612133255294080 EVENTS] ps-3099 [000] 560.770221: lock_acquire: ffff880030865010 &(&dentry->d_lock)->rlock CPU:0 [LOST 18446744071579453134 EVENTS] ps-3099 [000] 560.770222: lock_release: ffff880030865010 &(&dentry->d_lock)->rlock CPU:0 [LOST 18446744071579453134 EVENTS] ps-3099 [000] 560.770222: lock_release: ffffffff816cfb98 dcache_lock See peek_next_entry(), it does not set *lost_events when we 'cat tracing/trace' Signed-off-by: Lai Jiangshan LKML-Reference: <4BB9A929.2000303@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0498bebcbfd1..b9be232352b8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1571,7 +1571,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; - unsigned long lost_events, next_lost = 0; + unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; From cecbca96da387428e220e307a9c945e37e2f4d9e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Apr 2010 19:08:41 +0200 Subject: [PATCH 02/15] tracing: Dump either the oops's cpu source or all cpus buffers The ftrace_dump_on_oops kernel parameter, sysctl and sysrq let one dump every cpu buffers when an oops or panic happens. It's nice when you have few cpus but it may take ages if have many, plus you miss the real origin of the problem in all the cpu traces. Sometimes, all you need is to dump the cpu buffer that triggered the opps, most of the time it is our main interest. This patch modifies ftrace_dump_on_oops to handle this choice. The ftrace_dump_on_oops kernel parameter, when it comes alone, has the same behaviour than before. But ftrace_dump_on_oops=orig_cpu will only dump the buffer of the cpu that oops'ed. Similarly, sysctl kernel.ftrace_dump_on_oops=1 and echo 1 > /proc/sys/kernel/ftrace_dump_on_oops keep their previous behaviour. But setting 2 jumps into cpu origin dump mode. v2: Fix double setup v3: Fix spelling issues reported by Randy Dunlap v4: Also update __ftrace_dump in the selftests Signed-off-by: Frederic Weisbecker Acked-by: David S. Miller Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Li Zefan Cc: Lai Jiangshan --- Documentation/kernel-parameters.txt | 6 +++- Documentation/trace/ftrace.txt | 6 ++-- drivers/char/sysrq.c | 2 +- include/linux/ftrace.h | 4 ++- include/linux/kernel.h | 11 +++++-- kernel/trace/trace.c | 51 ++++++++++++++++++++++------- kernel/trace/trace_selftest.c | 5 +-- 7 files changed, 64 insertions(+), 21 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e4cbca58536c..ab67b33300fb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -789,8 +789,12 @@ and is between 256 and 4096 characters. It is defined in the file as early as possible in order to facilitate early boot debugging. - ftrace_dump_on_oops + ftrace_dump_on_oops[=orig_cpu] [FTRACE] will dump the trace buffers on oops. + If no parameter is passed, ftrace will dump + buffers of all CPUs, but if you pass orig_cpu, it will + dump only the buffer of the CPU that triggered the + oops. ftrace_filter=[function-list] [FTRACE] Limit the functions traced by the function diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 03485bfbd797..52011815c905 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1337,12 +1337,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one can either use the sysctl function or set it via the proc system interface. - sysctl kernel.ftrace_dump_on_oops=1 + sysctl kernel.ftrace_dump_on_oops=n or - echo 1 > /proc/sys/kernel/ftrace_dump_on_oops + echo n > /proc/sys/kernel/ftrace_dump_on_oops +If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will +only dump the buffer of the CPU that triggered the oops. Here's an example of such a dump after a null pointer dereference in a kernel module: diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 59de2525d303..d4e8b213a462 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -289,7 +289,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { static void sysrq_ftrace_dump(int key, struct tty_struct *tty) { - ftrace_dump(); + ftrace_dump(DUMP_ALL); } static struct sysrq_key_op sysrq_ftrace_dump_op = { .handler = sysrq_ftrace_dump, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 01e6adea07ec..ea5b1aae0e8b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -492,7 +492,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } -extern int ftrace_dump_on_oops; +enum ftrace_dump_mode; + +extern enum ftrace_dump_mode ftrace_dump_on_oops; #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9365227dbaf6..9fb1c1299032 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -490,6 +490,13 @@ static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } static inline int tracing_is_on(void) { return 0; } #endif + +enum ftrace_dump_mode { + DUMP_NONE, + DUMP_ALL, + DUMP_ORIG, +}; + #ifdef CONFIG_TRACING extern void tracing_start(void); extern void tracing_stop(void); @@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); -extern void ftrace_dump(void); +extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } @@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap) { return 0; } -static inline void ftrace_dump(void) { } +static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bed83cab6da2..7b516c7ef9a0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -117,9 +117,12 @@ static cpumask_var_t __read_mostly tracing_buffer_mask; * * It is default off, but you can enable it with either specifying * "ftrace_dump_on_oops" in the kernel command line, or setting - * /proc/sys/kernel/ftrace_dump_on_oops to true. + * /proc/sys/kernel/ftrace_dump_on_oops + * Set 1 if you want to dump buffers of all CPUs + * Set 2 if you want to dump the buffer of the CPU that triggered oops */ -int ftrace_dump_on_oops; + +enum ftrace_dump_mode ftrace_dump_on_oops; static int tracing_set_tracer(const char *buf); @@ -139,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { - ftrace_dump_on_oops = 1; - return 1; + if (*str++ != '=' || !*str) { + ftrace_dump_on_oops = DUMP_ALL; + return 1; + } + + if (!strcmp("orig_cpu", str)) { + ftrace_dump_on_oops = DUMP_ORIG; + return 1; + } + + return 0; } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); @@ -4338,7 +4350,7 @@ static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); return NOTIFY_OK; } @@ -4355,7 +4367,7 @@ static int trace_die_handler(struct notifier_block *self, switch (val) { case DIE_OOPS: if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); break; default: break; @@ -4396,7 +4408,8 @@ trace_printk_seq(struct trace_seq *s) trace_seq_init(s); } -static void __ftrace_dump(bool disable_tracing) +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) { static arch_spinlock_t ftrace_dump_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; @@ -4429,12 +4442,25 @@ static void __ftrace_dump(bool disable_tracing) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - printk(KERN_TRACE "Dumping ftrace buffer:\n"); - /* Simulate the iterator */ iter.tr = &global_trace; iter.trace = current_trace; - iter.cpu_file = TRACE_PIPE_ALL_CPU; + + switch (oops_dump_mode) { + case DUMP_ALL: + iter.cpu_file = TRACE_PIPE_ALL_CPU; + break; + case DUMP_ORIG: + iter.cpu_file = raw_smp_processor_id(); + break; + case DUMP_NONE: + goto out_enable; + default: + printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); + iter.cpu_file = TRACE_PIPE_ALL_CPU; + } + + printk(KERN_TRACE "Dumping ftrace buffer:\n"); /* * We need to stop all tracing on all CPUS to read the @@ -4473,6 +4499,7 @@ static void __ftrace_dump(bool disable_tracing) else printk(KERN_TRACE "---------------------------------\n"); + out_enable: /* Re-enable tracing if requested */ if (!disable_tracing) { trace_flags |= old_userobj; @@ -4489,9 +4516,9 @@ static void __ftrace_dump(bool disable_tracing) } /* By default: disable tracing after the dump */ -void ftrace_dump(void) +void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { - __ftrace_dump(true); + __ftrace_dump(true, oops_dump_mode); } __init static int tracer_alloc_buffers(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9398034f814a..6a9d36ddfcf2 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -256,7 +256,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* Maximum number of functions to trace before diagnosing a hang */ #define GRAPH_MAX_FUNC_TEST 100000000 -static void __ftrace_dump(bool disable_tracing); +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ @@ -267,7 +268,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) ftrace_graph_stop(); printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); if (ftrace_dump_on_oops) - __ftrace_dump(false); + __ftrace_dump(false, DUMP_ALL); return 0; } From 9106b69382912ddc403a307b69bf894a6f3004e4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 2 Apr 2010 19:01:20 +0200 Subject: [PATCH 03/15] tracing: Add ftrace events for graph tracer Add ftrace events for graph tracer, so the graph output could be shared with other tracers. Signed-off-by: Jiri Olsa LKML-Reference: <1270227683-14631-2-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 669b9c31861d..db9e06bb766e 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -40,7 +40,7 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 -#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -1096,6 +1096,12 @@ print_graph_function(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static enum print_line_t +print_graph_function_event(struct trace_iterator *iter, int flags) +{ + return print_graph_function(iter); +} + static void print_lat_header(struct seq_file *s) { static const char spaces[] = " " /* 16 spaces */ @@ -1199,6 +1205,16 @@ static void graph_trace_close(struct trace_iterator *iter) } } +static struct trace_event graph_trace_entry_event = { + .type = TRACE_GRAPH_ENT, + .trace = print_graph_function_event, +}; + +static struct trace_event graph_trace_ret_event = { + .type = TRACE_GRAPH_RET, + .trace = print_graph_function_event, +}; + static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, @@ -1220,6 +1236,16 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); + if (!register_ftrace_event(&graph_trace_entry_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + + if (!register_ftrace_event(&graph_trace_ret_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + return register_tracer(&graph_trace); } From d7a8d9e907cc294ec7a4a7046d1886375fbcc82e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 2 Apr 2010 19:01:21 +0200 Subject: [PATCH 04/15] tracing: Have graph flags passed in to ouput functions Let the function graph tracer have custom flags passed to its output functions. Signed-off-by: Jiri Olsa LKML-Reference: <1270227683-14631-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 +- kernel/trace/trace_functions_graph.c | 123 +++++++++++++++------------ 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2825ef2c0b15..970004c5fa79 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -491,7 +491,9 @@ extern int trace_clock_id; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -extern enum print_line_t print_graph_function(struct trace_iterator *iter); +extern enum print_line_t +print_graph_function_flags(struct trace_iterator *iter, u32 flags); +extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern enum print_line_t trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); @@ -524,7 +526,7 @@ static inline int ftrace_graph_addr(unsigned long addr) #endif /* CONFIG_DYNAMIC_FTRACE */ #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index db9e06bb766e..de5f6518aba0 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -527,17 +527,18 @@ get_return_for_leaf(struct trace_iterator *iter, /* Signal a overhead of time execution to the output */ static int -print_graph_overhead(unsigned long long duration, struct trace_seq *s) +print_graph_overhead(unsigned long long duration, struct trace_seq *s, + u32 flags) { /* If duration disappear, we don't need anything */ - if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) + if (!(flags & TRACE_GRAPH_PRINT_DURATION)) return 1; /* Non nested entry or return */ if (duration == -1) return trace_seq_printf(s, " "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { /* Duration exceeded 100 msecs */ if (duration > 100000ULL) return trace_seq_printf(s, "! "); @@ -563,7 +564,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s) static enum print_line_t print_graph_irq(struct trace_iterator *iter, unsigned long addr, - enum trace_type type, int cpu, pid_t pid) + enum trace_type type, int cpu, pid_t pid, u32 flags) { int ret; struct trace_seq *s = &iter->seq; @@ -573,21 +574,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_UNHANDLED; /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -597,7 +598,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -610,7 +611,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_PARTIAL_LINE; /* Don't close the duration column if haven't one */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) trace_seq_printf(s, " |"); ret = trace_seq_printf(s, "\n"); @@ -680,7 +681,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) static enum print_line_t print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) + struct ftrace_graph_ret_entry *ret_entry, + struct trace_seq *s, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ret *graph_ret; @@ -712,12 +714,12 @@ print_graph_entry_leaf(struct trace_iterator *iter, } /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -740,7 +742,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, static enum print_line_t print_graph_entry_nested(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct trace_seq *s, int cpu) + struct trace_seq *s, int cpu, u32 flags) { struct ftrace_graph_ent *call = &entry->graph_ent; struct fgraph_data *data = iter->private; @@ -760,12 +762,12 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -791,7 +793,7 @@ print_graph_entry_nested(struct trace_iterator *iter, static enum print_line_t print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, - int type, unsigned long addr) + int type, unsigned long addr, u32 flags) { struct fgraph_data *data = iter->private; struct trace_entry *ent = iter->ent; @@ -804,27 +806,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, if (type) { /* Interrupt */ - ret = print_graph_irq(iter, addr, type, cpu, ent->pid); + ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -846,7 +848,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, static enum print_line_t print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, - struct trace_iterator *iter) + struct trace_iterator *iter, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ent *call = &field->graph_ent; @@ -854,14 +856,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t ret; int cpu = iter->cpu; - if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) + if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) return TRACE_TYPE_PARTIAL_LINE; leaf_ret = get_return_for_leaf(iter, field); if (leaf_ret) - ret = print_graph_entry_leaf(iter, field, leaf_ret, s); + ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags); else - ret = print_graph_entry_nested(iter, field, s, cpu); + ret = print_graph_entry_nested(iter, field, s, cpu, flags); if (data) { /* @@ -880,7 +882,8 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, - struct trace_entry *ent, struct trace_iterator *iter) + struct trace_entry *ent, struct trace_iterator *iter, + u32 flags) { unsigned long long duration = trace->rettime - trace->calltime; struct fgraph_data *data = iter->private; @@ -910,16 +913,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } } - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -949,14 +952,15 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } /* Overrun */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + if (flags & TRACE_GRAPH_PRINT_OVERRUN) { ret = trace_seq_printf(s, " (Overruns: %lu)\n", trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } - ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); + ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, + cpu, pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -964,8 +968,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } static enum print_line_t -print_graph_comment(struct trace_seq *s, struct trace_entry *ent, - struct trace_iterator *iter) +print_graph_comment(struct trace_seq *s, struct trace_entry *ent, + struct trace_iterator *iter, u32 flags) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct fgraph_data *data = iter->private; @@ -977,16 +981,16 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, if (data) depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1041,7 +1045,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { struct ftrace_graph_ent_entry *field; struct fgraph_data *data = iter->private; @@ -1062,7 +1066,7 @@ print_graph_function(struct trace_iterator *iter) if (data && data->failed) { field = &data->ent; iter->cpu = data->cpu; - ret = print_graph_entry(field, s, iter); + ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; ret = TRACE_TYPE_NO_CONSUME; @@ -1082,38 +1086,44 @@ print_graph_function(struct trace_iterator *iter) struct ftrace_graph_ent_entry saved; trace_assign_type(field, entry); saved = *field; - return print_graph_entry(&saved, s, iter); + return print_graph_entry(&saved, s, iter, flags); } case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); - return print_graph_return(&field->ret, s, entry, iter); + return print_graph_return(&field->ret, s, entry, iter, flags); } default: - return print_graph_comment(s, entry, iter); + return print_graph_comment(s, entry, iter, flags); } return TRACE_TYPE_HANDLED; } +static enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + return print_graph_function_flags(iter, tracer_flags.val); +} + static enum print_line_t print_graph_function_event(struct trace_iterator *iter, int flags) { return print_graph_function(iter); } -static void print_lat_header(struct seq_file *s) +static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ " " /* 4 spaces */ " "; /* 17 spaces */ int size = 0; - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) size += 16; - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) size += 4; - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) size += 17; seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); @@ -1124,42 +1134,47 @@ static void print_lat_header(struct seq_file *s) seq_printf(s, "#%.*s|||| / \n", size, spaces); } -static void print_graph_headers(struct seq_file *s) +void print_graph_headers_flags(struct seq_file *s, u32 flags) { int lat = trace_flags & TRACE_ITER_LATENCY_FMT; if (lat) - print_lat_header(s); + print_lat_header(s, flags); /* 1st line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " TIME "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " CPU"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " TASK/PID "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); /* 2nd line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " | | "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); } +static void print_graph_headers(struct seq_file *s) +{ + print_graph_headers_flags(s, tracer_flags.val); +} + static void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ From 62b915f1060996a8e1f69be50e3b8e9e43b710cb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 2 Apr 2010 19:01:22 +0200 Subject: [PATCH 05/15] tracing: Add graph output support for irqsoff tracer Add function graph output to irqsoff tracer. The graph output is enabled by setting new 'display-graph' trace option. Signed-off-by: Jiri Olsa LKML-Reference: <1270227683-14631-4-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 15 +- kernel/trace/trace.c | 35 ++-- kernel/trace/trace.h | 21 +++ kernel/trace/trace_functions_graph.c | 15 +- kernel/trace/trace_irqsoff.c | 271 +++++++++++++++++++++++++-- 5 files changed, 324 insertions(+), 33 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ea5b1aae0e8b..8415a522f430 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -352,6 +352,10 @@ struct ftrace_graph_ret { int depth; }; +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* for init task */ @@ -400,10 +404,6 @@ extern char __irqentry_text_end[]; #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ - extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); @@ -441,6 +441,13 @@ static inline void unpause_graph_tracing(void) static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } +static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) +{ + return -1; +} +static inline void unregister_ftrace_graph(void) { } + static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7b516c7ef9a0..8b9ba41ec146 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1808,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m) } -static void +void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -2017,7 +2017,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; } -static int trace_empty(struct trace_iterator *iter) +int trace_empty(struct trace_iterator *iter) { int cpu; @@ -2084,6 +2084,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_trace_fmt(iter); } +void trace_default_header(struct seq_file *m) +{ + struct trace_iterator *iter = m->private; + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2096,17 +2113,9 @@ static int s_show(struct seq_file *m, void *v) } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); - else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { - /* print nothing if the buffers are empty */ - if (trace_empty(iter)) - return 0; - print_trace_header(m, iter); - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_lat_help_header(m); - } else { - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_func_help_header(m); - } + else + trace_default_header(m); + } else if (iter->leftover) { /* * If we filled the seq_file buffer earlier, we diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 970004c5fa79..911e9864e94a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -378,6 +378,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void trace_default_header(struct seq_file *m); +void print_trace_header(struct seq_file *m, struct trace_iterator *iter); +int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); @@ -491,11 +494,29 @@ extern int trace_clock_id; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* Flag options */ +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +#define TRACE_GRAPH_PRINT_CPU 0x2 +#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 +#define TRACE_GRAPH_PRINT_DURATION 0x10 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 + extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern enum print_line_t trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); +extern void graph_trace_open(struct trace_iterator *iter); +extern void graph_trace_close(struct trace_iterator *iter); +extern int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned long flags, int pc); +extern void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, + unsigned long flags, int pc); + #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index de5f6518aba0..dd11c830eb84 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -179,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } -static int __trace_graph_entry(struct trace_array *tr, +int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -246,7 +246,7 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) return trace_graph_entry(trace); } -static void __trace_graph_return(struct trace_array *tr, +void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -1093,6 +1093,11 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) trace_assign_type(field, entry); return print_graph_return(&field->ret, s, entry, iter, flags); } + case TRACE_STACK: + case TRACE_FN: + /* dont trace stack and functions as comments */ + return TRACE_TYPE_UNHANDLED; + default: return print_graph_comment(s, entry, iter, flags); } @@ -1170,12 +1175,12 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) seq_printf(s, " | | | |\n"); } -static void print_graph_headers(struct seq_file *s) +void print_graph_headers(struct seq_file *s) { print_graph_headers_flags(s, tracer_flags.val); } -static void graph_trace_open(struct trace_iterator *iter) +void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; @@ -1210,7 +1215,7 @@ static void graph_trace_open(struct trace_iterator *iter) pr_warning("function graph tracer: not enough memory\n"); } -static void graph_trace_close(struct trace_iterator *iter) +void graph_trace_close(struct trace_iterator *iter) { struct fgraph_data *data = iter->private; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2974bc7538c7..6fd486e0cef4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -34,6 +34,9 @@ static int trace_type __read_mostly; static int save_lat_flag; +static void stop_irqsoff_tracer(struct trace_array *tr, int graph); +static int start_irqsoff_tracer(struct trace_array *tr, int graph); + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -55,6 +58,23 @@ irq_trace(void) # define irq_trace() (0) #endif +#define TRACE_DISPLAY_GRAPH 1 + +static struct tracer_opt trace_opts[] = { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* display latency trace as call graph */ + { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +#endif + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, + .opts = trace_opts, +}; + +#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly = }; #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + int cpu; + + if (!(bit & TRACE_DISPLAY_GRAPH)) + return -EINVAL; + + if (!(is_graph() ^ set)) + return 0; + + stop_irqsoff_tracer(irqsoff_trace, !set); + + for_each_possible_cpu(cpu) + per_cpu(tracing_cpu, cpu) = 0; + + tracing_max_latency = 0; + tracing_reset_online_cpus(irqsoff_trace); + + return start_irqsoff_tracer(irqsoff_trace, set); +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int ret; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return 0; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return 0; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else + ret = 0; + + atomic_dec(&data->disabled); + return ret; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, trace, flags, pc); + } + + atomic_dec(&data->disabled); +} + +static void irqsoff_trace_open(struct trace_iterator *iter) +{ + if (is_graph()) + graph_trace_open(iter); + +} + +static void irqsoff_trace_close(struct trace_iterator *iter) +{ + if (iter->private) + graph_trace_close(iter); +} + +#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ + TRACE_GRAPH_PRINT_PROC) + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) + flags |= TRACE_GRAPH_PRINT_DURATION; + else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + /* + * In graph mode call the graph tracer output function, + * otherwise go with the TRACE_FN event handler + */ + if (is_graph()) + return print_graph_function_flags(iter, flags); + + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_print_header(struct seq_file *s) +{ + if (is_graph()) { + struct trace_iterator *iter = s->private; + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + + print_trace_header(s, iter); + flags |= TRACE_GRAPH_PRINT_DURATION; + } else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + print_graph_headers_flags(s, flags); + } else + trace_default_header(s); +} + +static void +trace_graph_function(struct trace_array *tr, + unsigned long ip, unsigned long flags, int pc) +{ + u64 time = trace_clock_local(); + struct ftrace_graph_ent ent = { + .func = ip, + .depth = 0, + }; + struct ftrace_graph_ret ret = { + .func = ip, + .depth = 0, + .calltime = time, + .rettime = time, + }; + + __trace_graph_entry(tr, &ent, flags, pc); + __trace_graph_return(tr, &ret, flags, pc); +} + +static void +__trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + unsigned long flags, int pc) +{ + if (!is_graph()) + trace_function(tr, ip, parent_ip, flags, pc); + else { + trace_graph_function(tr, parent_ip, flags, pc); + trace_graph_function(tr, ip, flags, pc); + } +} + +#else +#define __trace_function trace_function + +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + return -EINVAL; +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + return -1; +} + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } +static void irqsoff_print_header(struct seq_file *s) { } +static void irqsoff_trace_open(struct trace_iterator *iter) { } +static void irqsoff_trace_close(struct trace_iterator *iter) { } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* * Should this new latency be reported/recorded? */ @@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); /* Skip 5 functions to get to the irq/preempt enable function */ __trace_stack(tr, flags, 5, pc); @@ -172,7 +388,7 @@ check_critical_timing(struct trace_array *tr, out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static void start_irqsoff_tracer(struct trace_array *tr) +static int start_irqsoff_tracer(struct trace_array *tr, int graph) { - register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) + int ret = 0; + + if (!graph) + ret = register_ftrace_function(&trace_ops); + else + ret = register_ftrace_graph(&irqsoff_graph_return, + &irqsoff_graph_entry); + + if (!ret && tracing_is_enabled()) tracer_enabled = 1; else tracer_enabled = 0; + + return ret; } -static void stop_irqsoff_tracer(struct trace_array *tr) +static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_ftrace_function(&trace_ops); + + if (!graph) + unregister_ftrace_function(&trace_ops); + else + unregister_ftrace_graph(); } static void __irqsoff_tracer_init(struct trace_array *tr) @@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr) /* make sure that the tracer is visible */ smp_wmb(); tracing_reset_online_cpus(tr); - start_irqsoff_tracer(tr); + + if (start_irqsoff_tracer(tr, is_graph())) + printk(KERN_ERR "failed to start irqsoff tracer\n"); } static void irqsoff_tracer_reset(struct trace_array *tr) { - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr, is_graph()); if (!save_lat_flag) trace_flags &= ~TRACE_ITER_LATENCY_FMT; @@ -409,9 +640,15 @@ static struct tracer irqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -435,9 +672,15 @@ static struct tracer preemptoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -463,9 +706,15 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptirqsoff(trace) register_tracer(&trace) From 72c9ddfd4c5bf54ef03cfdf57026416cb678eeba Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 20 Apr 2010 15:47:11 -0700 Subject: [PATCH 06/15] ring-buffer: Make non-consuming read less expensive with lots of cpus. When performing a non-consuming read, a synchronize_sched() is performed once for every cpu which is actively tracing. This is very expensive, and can make it take several seconds to open up the 'trace' file with lots of cpus. Only one synchronize_sched() call is actually necessary. What is desired is for all cpus to see the disabling state change. So we transform the existing sequence: for_each_cpu() { ring_buffer_read_start(); } where each ring_buffer_start() call performs a synchronize_sched(), into the following: for_each_cpu() { ring_buffer_read_prepare(); } ring_buffer_read_prepare_sync(); for_each_cpu() { ring_buffer_read_start(); } wherein only the single ring_buffer_read_prepare_sync() call needs to do the synchronize_sched(). The first phase, via ring_buffer_read_prepare(), allocates the 'iter' memory and increments ->record_disabled. In the second phase, ring_buffer_read_prepare_sync() makes sure this ->record_disabled state is visible fully to all cpus. And in the final third phase, the ring_buffer_read_start() calls reset the 'iter' objects allocated in the first phase since we now know that none of the cpus are adding trace entries any more. This makes openning the 'trace' file nearly instantaneous on a sparc64 Niagara2 box with 128 cpus tracing. Signed-off-by: David S. Miller LKML-Reference: <20100420.154711.11246950.davem@davemloft.net> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 4 ++- kernel/trace/ring_buffer.c | 64 +++++++++++++++++++++++++++++++------ kernel/trace/trace.c | 11 +++++-- 3 files changed, 65 insertions(+), 14 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c8297761e414..25b4f686d918 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_prepare_sync(void); +void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5885cdfc41f3..2a090448ef6b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3332,23 +3332,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, EXPORT_SYMBOL_GPL(ring_buffer_consume); /** - * ring_buffer_read_start - start a non consuming read of the buffer + * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * - * This starts up an iteration through the buffer. It also disables - * the recording to the buffer until the reading is finished. - * This prevents the reading from being corrupted. This is not - * a consuming read, so a producer is not expected. + * This performs the initial preparations necessary to iterate + * through the buffer. Memory is allocated, buffer recording + * is disabled, and the iterator pointer is returned to the caller. * - * Must be paired with ring_buffer_finish. + * Disabling buffer recordng prevents the reading from being + * corrupted. This is not a consuming read, so a producer is not + * expected. + * + * After a sequence of ring_buffer_read_prepare calls, the user is + * expected to make at least one call to ring_buffer_prepare_sync. + * Afterwards, ring_buffer_read_start is invoked to get things going + * for real. + * + * This overall must be paired with ring_buffer_finish. */ struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu) +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; - unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; @@ -3362,15 +3369,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) iter->cpu_buffer = cpu_buffer; atomic_inc(&cpu_buffer->record_disabled); + + return iter; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); + +/** + * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls + * + * All previously invoked ring_buffer_read_prepare calls to prepare + * iterators will be synchronized. Afterwards, read_buffer_read_start + * calls on those iterators are allowed. + */ +void +ring_buffer_read_prepare_sync(void) +{ synchronize_sched(); +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); + +/** + * ring_buffer_read_start - start a non consuming read of the buffer + * @iter: The iterator returned by ring_buffer_read_prepare + * + * This finalizes the startup of an iteration through the buffer. + * The iterator comes from a call to ring_buffer_read_prepare and + * an intervening ring_buffer_read_prepare_sync must have been + * performed. + * + * Must be paired with ring_buffer_finish. + */ +void +ring_buffer_read_start(struct ring_buffer_iter *iter) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; + + if (!iter) + return; + + cpu_buffer = iter->cpu_buffer; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return iter; } EXPORT_SYMBOL_GPL(ring_buffer_read_start); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b9ba41ec146..756d7283318b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2201,15 +2201,20 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { - iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + } + ring_buffer_read_prepare_sync(); + for_each_tracing_cpu(cpu) { + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare_sync(); + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } From a838b2e634405fb89ddbf4fa9412acb33911911f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 Apr 2010 13:26:58 -0400 Subject: [PATCH 07/15] ring-buffer: Make benchmark handle missed events With the addition of the "missed events" flags that is stored in the commit field of the ring buffer page, the ring_buffer_benchmark was not updated to handle this. If events are missed, then the missed events flag is set in the ring buffer page, the benchmark will count that flag as part of the size of the page and will hit the BUG() when it tries to read beyond the page. The solution is simply to have the ring buffer benchmark mask off the extra bits. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer_benchmark.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index dc56556b55a2..302f8a614635 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -113,7 +113,8 @@ static enum event_status read_page(int cpu) ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); if (ret >= 0) { rpage = bpage; - commit = local_read(&rpage->commit); + /* The commit may have missed event flags set, clear them */ + commit = local_read(&rpage->commit) & 0xfffff; for (i = 0; i < commit && !kill_test; i += inc) { if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { From 07271aa42d13378e67ebd79ea9ca1c4a5e2ad46f Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Fri, 23 Apr 2010 14:02:39 -0400 Subject: [PATCH 08/15] tracing: Add documentation for trace commands mod, traceon/traceoff The mod command went in as commit 64e7c440618998fd69eee6ab490b042d12248021 The traceon/traceoff commands went in as commit 23b4ff3aa479c9e3bb23cb6b2d0a97878399784a Signed-off-by: Chase Douglas LKML-Reference: <1272045759-32018-1-git-send-email-chase.douglas@canonical.com> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 52011815c905..557c1edeccaf 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -155,6 +155,9 @@ of ftrace. Here is a list of some of the key files: to be traced. Echoing names of functions into this file will limit the trace to only those functions. + This interface also allows for commands to be used. See the + "Filter commands" section for more details. + set_ftrace_notrace: This has an effect opposite to that of @@ -1824,6 +1827,47 @@ this special filter via: echo > set_graph_function +Filter commands +--------------- + +A few commands are supported by the set_ftrace_filter interface. +Trace commands have the following format: + +:: + +The following commands are supported: + +- mod + This command enables function filtering per module. The + parameter defines the module. For example, if only the write* + functions in the ext3 module are desired, run: + + echo 'write*:mod:ext3' > set_ftrace_filter + + This command interacts with the filter in the same way as + filtering based on function names. Thus, adding more functions + in a different module is accomplished by appending (>>) to the + filter file. Remove specific module functions by prepending + '!': + + echo '!writeback*:mod:ext3' >> set_ftrace_filter + +- traceon/traceoff + These commands turn tracing on and off when the specified + functions are hit. The parameter determines how many times the + tracing system is turned on and off. If unspecified, there is + no limit. For example, to disable tracing when a schedule bug + is hit the first 5 times, run: + + echo '__schedule_bug:traceoff:5' > set_ftrace_filter + + These commands are cumulative whether or not they are appended + to set_ftrace_filter. To remove a command, prepend it by '!' + and drop the parameter: + + echo '!__schedule_bug:traceoff' > set_ftrace_filter + + trace_pipe ---------- From e330b3bcd83199dd63a819d8d12e40f9edae6c77 Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Mon, 26 Apr 2010 14:02:05 -0400 Subject: [PATCH 09/15] tracing: Show sample std dev in function profiling When combined with function graph tracing the ftrace function profiler also prints the average run time of functions. While this gives us some good information, it doesn't tell us anything about the variance of the run times of the function. This change prints out the s^2 sample standard deviation alongside the average. This change adds one entry to the profile record structure. This increases the memory footprint of the function profiler by 1/3 on a 32-bit system, and by 1/5 on a 64-bit system when function graphing is enabled, though the memory is only allocated when the profiler is turned on. During the profiling, one extra line of code adds the squared calltime to the new record entry, so this should not adversly affect performance. Note that the square of the sample standard deviation is printed because there is no sqrt implementation for unsigned long long in the kernel. Signed-off-by: Chase Douglas LKML-Reference: <1272304925-2436-1-git-send-email-chase.douglas@canonical.com> [ fixed comment about ns^2 -> us^2 conversion ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2404b59b3097..3bcb340d6f02 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -264,6 +264,7 @@ struct ftrace_profile { unsigned long counter; #ifdef CONFIG_FUNCTION_GRAPH_TRACER unsigned long long time; + unsigned long long time_squared; #endif }; @@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " Function " - "Hit Time Avg\n" + "Hit Time Avg s^2\n" " -------- " - "--- ---- ---\n"); + "--- ---- --- ---\n"); #else seq_printf(m, " Function Hit\n" " -------- ---\n"); @@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v) static DEFINE_MUTEX(mutex); static struct trace_seq s; unsigned long long avg; + unsigned long long stddev; #endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v) avg = rec->time; do_div(avg, rec->counter); + /* Sample standard deviation (s^2) */ + if (rec->counter <= 1) + stddev = 0; + else { + stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Divide only 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide 1000 again. + */ + do_div(stddev, (rec->counter - 1) * 1000); + } + mutex_lock(&mutex); trace_seq_init(&s); trace_print_graph_duration(rec->time, &s); trace_seq_puts(&s, " "); trace_print_graph_duration(avg, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(stddev, &s); trace_print_seq(m, &s); mutex_unlock(&mutex); #endif @@ -668,8 +684,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) } rec = ftrace_find_profiled_func(stat, trace->func); - if (rec) + if (rec) { rec->time += calltime; + rec->time_squared += calltime * calltime; + } out: local_irq_restore(flags); From 37e44bc50d91df1fe7edcf6f02fe168c6d802e64 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 Apr 2010 21:04:24 -0400 Subject: [PATCH 10/15] tracing: Fix sleep time function profiling When sleep_time is off the function profiler ignores the time that a task is scheduled out. When the task is scheduled out a timestamp is taken. When the task is scheduled back in, the timestamp is compared to the current time and the saved calltimes are adjusted accordingly. But when stopping the function profiler, the sched switch hook that does this adjustment was stopped before shutting down the tracer. This allowed some tasks to not get their timestamps set when they scheduled out. When the function profiler started again, this would skew the times of the scheduler functions. This patch moves the stopping of the sched switch to after the function profiler is stopped. It also ignores zero set calltimes, which may happen on start up. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3bcb340d6f02..8c9c2934c45f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -666,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash || !ftrace_profile_enabled) goto out; + /* If the calltime was zero'd ignore it */ + if (!trace->calltime) + goto out; + calltime = trace->rettime - trace->calltime; if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { @@ -3357,11 +3361,11 @@ void unregister_ftrace_graph(void) goto out; ftrace_graph_active--; - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); out: mutex_unlock(&ftrace_lock); From 4dbf6bc239c169b032777616806ecc648058f6b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 4 May 2010 11:24:01 -0400 Subject: [PATCH 11/15] tracing: Convert nop macros to static inlines The ftrace.h file contains several functions as macros when the functions are disabled due to config options. This patch converts most of them to static inlines. There are two exceptions: register_ftrace_function() and unregister_ftrace_function() This is because their parameter "ops" must not be evaluated since code using the function is allowed to #ifdef out the creation of the parameter. This also fixes an error caused by recent changes: kernel/trace/trace_irqsoff.c: In function 'start_irqsoff_tracer': kernel/trace/trace_irqsoff.c:571: error: expected expression before 'do' Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8415a522f430..e0ae83bbd9cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -82,9 +82,13 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); #else /* !CONFIG_FUNCTION_TRACER */ -# define register_ftrace_function(ops) do { } while (0) -# define unregister_ftrace_function(ops) do { } while (0) -# define clear_ftrace_function(ops) do { } while (0) +/* + * (un)register_ftrace_function must be a macro since the ops parameter + * must not be evaluated. + */ +#define register_ftrace_function(ops) ({ 0; }) +#define unregister_ftrace_function(ops) ({ 0; }) +static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } @@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else -# define skip_trace(ip) ({ 0; }) -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) -# define ftrace_disable_daemon() do { } while (0) -# define ftrace_enable_daemon() do { } while (0) +static inline int skip_trace(unsigned long ip) { return 0; } +static inline int ftrace_force_update(void) { return 0; } +static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ +} +static inline void ftrace_disable_daemon(void) { } +static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { @@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled) extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else -# define time_hardirqs_on(a0, a1) do { } while (0) -# define time_hardirqs_off(a0, a1) do { } while (0) + static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { } + static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_PREEMPT_TRACER extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else -# define trace_preempt_on(a0, a1) do { } while (0) -# define trace_preempt_off(a0, a1) do { } while (0) + static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { } + static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD From 956097912c40a03bf22603a3be73503fd9ea9e44 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 2 May 2010 08:03:54 +0200 Subject: [PATCH 12/15] ring-buffer: Wrap open-coded WARN_ONCE Wrap open-coded WARN_ONCE functionality into the equivalent macro. Signed-off-by: Borislav Petkov LKML-Reference: <20100502060354.GA5281@liondog.tnic> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2a090448ef6b..7f6059c5aa94 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2000,17 +2000,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, u64 *delta) { struct ring_buffer_event *event; - static int once; int ret; - if (unlikely(*delta > (1ULL << 59) && !once++)) { - printk(KERN_WARNING "Delta way too big! %llu" - " ts=%llu write stamp = %llu\n", - (unsigned long long)*delta, - (unsigned long long)*ts, - (unsigned long long)cpu_buffer->write_stamp); - WARN_ON(1); - } + WARN_ONCE(*delta > (1ULL << 59), + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + (unsigned long long)*delta, + (unsigned long long)*ts, + (unsigned long long)cpu_buffer->write_stamp); /* * The delta is too big, we to add a From 03d646e62b06e9364e2dbb939d67934c6c9826cd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 21 Dec 2009 14:27:24 +0800 Subject: [PATCH 13/15] tracing: Make the documentation clear on trace_event boot option Make it clear that event-list is a comma separated list of events. Reported-by: KOSAKI Motohiro Signed-off-by: Li Zefan LKML-Reference: <4B2F154C.2060503@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 02ac6ed38b2d..778ddf38b82c 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -90,7 +90,8 @@ In order to facilitate early boot debugging, use boot option: trace_event=[event-list] -The format of this boot option is the same as described in section 2.1. +event-list is a comma separated list of events. See section 2.1 for event +format. 3. Defining an event-enabled tracepoint ======================================= From 2e26ca7150a4f2ab3e69471dfc65f131e7dd7a05 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 May 2010 10:52:31 -0400 Subject: [PATCH 14/15] tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header When more than one header is included under CREATE_TRACE_POINTS the DECLARE_TRACE() macro is not defined back to its original meaning and the second include will fail to initialize the TRACE_EVENT() and DECLARE_TRACE() correctly. To fix this the tracepoint.h file moves the define of DECLARE_TRACE() out of the #ifdef _LINUX_TRACEPOINT_H protection (just like the define of the TRACE_EVENT()). This way the define_trace.h will undef the DECLARE_TRACE() at the end and allow new headers to start from scratch. This patch also requires fixing the include/events/napi.h It currently uses DECLARE_TRACE() and should be converted to a TRACE_EVENT() format. But I'll leave that change to the authors of that file. But since the napi.h file depends on using the CREATE_TRACE_POINTS and does not define its own DEFINE_TRACE() it must use the define_trace.h method instead. Cc: Neil Horman Cc: David S. Miller Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 114 ++++++++++++++++++----------------- include/trace/define_trace.h | 5 ++ include/trace/events/napi.h | 10 ++- 3 files changed, 72 insertions(+), 57 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 78b4bd3be496..1d85f9a6a199 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -33,6 +33,65 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +/* + * Connect a probe to a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_register(const char *name, void *probe); + +/* + * Disconnect a probe from a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_unregister(const char *name, void *probe); + +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + +struct tracepoint_iter { + struct module *module; + struct tracepoint *tracepoint; +}; + +extern void tracepoint_iter_start(struct tracepoint_iter *iter); +extern void tracepoint_iter_next(struct tracepoint_iter *iter); +extern void tracepoint_iter_stop(struct tracepoint_iter *iter); +extern void tracepoint_iter_reset(struct tracepoint_iter *iter); +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end); + +/* + * tracepoint_synchronize_unregister must be called between the last tracepoint + * probe unregistration and the end of module exit to make sure there is no + * caller executing a probe when it is freed. + */ +static inline void tracepoint_synchronize_unregister(void) +{ + synchronize_sched(); +} + +#define PARAMS(args...) args + +#ifdef CONFIG_TRACEPOINTS +extern void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end); +#else +static inline void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ } +#endif /* CONFIG_TRACEPOINTS */ + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include + * file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ + #ifndef DECLARE_TRACE #define TP_PROTO(args...) args @@ -96,9 +155,6 @@ struct tracepoint { #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name) -extern void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end); - #else /* !CONFIG_TRACEPOINTS */ #define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ @@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -static inline void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end) -{ } #endif /* CONFIG_TRACEPOINTS */ #endif /* DECLARE_TRACE */ -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_unregister(const char *name, void *probe); - -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); -extern void tracepoint_probe_update_all(void); - -struct tracepoint_iter { - struct module *module; - struct tracepoint *tracepoint; -}; - -extern void tracepoint_iter_start(struct tracepoint_iter *iter); -extern void tracepoint_iter_next(struct tracepoint_iter *iter); -extern void tracepoint_iter_stop(struct tracepoint_iter *iter); -extern void tracepoint_iter_reset(struct tracepoint_iter *iter); -extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end); - -/* - * tracepoint_synchronize_unregister must be called between the last tracepoint - * probe unregistration and the end of module exit to make sure there is no - * caller executing a probe when it is freed. - */ -static inline void tracepoint_synchronize_unregister(void) -{ - synchronize_sched(); -} - -#define PARAMS(args...) args - -#endif /* _LINUX_TRACEPOINT_H */ - -/* - * Note: we keep the TRACE_EVENT outside the include file ifdef protection. - * This is due to the way trace events work. If a file includes two - * trace event headers under one "CREATE_TRACE_POINTS" the first include - * will override the TRACE_EVENT and break the second include. - */ - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5acfb1eb4df9..1dfab5401511 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -65,6 +65,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* Make all open coded DECLARE_TRACE nops */ +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) + #ifdef CONFIG_EVENT_TRACING #include #endif @@ -75,6 +79,7 @@ #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ +#undef DECLARE_TRACE /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index a8989c4547e7..188deca2f3c7 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -1,4 +1,7 @@ -#ifndef _TRACE_NAPI_H_ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM napi + +#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NAPI_H_ #include @@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), TP_ARGS(napi)); -#endif +#endif /* _TRACE_NAPI_H_ */ + +/* This part must be outside protection */ +#include From 668eb65f092902eb7dd526af73d4a7f025a94612 Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Sun, 24 Jan 2010 11:03:50 -0500 Subject: [PATCH 15/15] tracing: Fix "integer as NULL pointer" warning. kernel/trace/trace_output.c:256:24: warning: Using plain integer as NULL pointer Signed-off-by: Thiago Farina LKML-Reference: <1264349038-1766-3-git-send-email-tfransosi@gmail.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8e46b3323cdc..2404c129a8c9 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -253,7 +253,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) void *ret; if (s->full) - return 0; + return NULL; if (len > ((PAGE_SIZE - 1) - s->len)) { s->full = 1;