diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0c6c56076d19..567b7a8eb878 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -786,8 +786,12 @@ and is between 256 and 4096 characters. It is defined in the file as early as possible in order to facilitate early boot debugging. - ftrace_dump_on_oops + ftrace_dump_on_oops[=orig_cpu] [FTRACE] will dump the trace buffers on oops. + If no parameter is passed, ftrace will dump + buffers of all CPUs, but if you pass orig_cpu, it will + dump only the buffer of the CPU that triggered the + oops. ftrace_filter=[function-list] [FTRACE] Limit the functions traced by the function diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 02ac6ed38b2d..778ddf38b82c 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -90,7 +90,8 @@ In order to facilitate early boot debugging, use boot option: trace_event=[event-list] -The format of this boot option is the same as described in section 2.1. +event-list is a comma separated list of events. See section 2.1 for event +format. 3. Defining an event-enabled tracepoint ======================================= diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 03485bfbd797..557c1edeccaf 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -155,6 +155,9 @@ of ftrace. Here is a list of some of the key files: to be traced. Echoing names of functions into this file will limit the trace to only those functions. + This interface also allows for commands to be used. See the + "Filter commands" section for more details. + set_ftrace_notrace: This has an effect opposite to that of @@ -1337,12 +1340,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one can either use the sysctl function or set it via the proc system interface. - sysctl kernel.ftrace_dump_on_oops=1 + sysctl kernel.ftrace_dump_on_oops=n or - echo 1 > /proc/sys/kernel/ftrace_dump_on_oops + echo n > /proc/sys/kernel/ftrace_dump_on_oops +If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will +only dump the buffer of the CPU that triggered the oops. Here's an example of such a dump after a null pointer dereference in a kernel module: @@ -1822,6 +1827,47 @@ this special filter via: echo > set_graph_function +Filter commands +--------------- + +A few commands are supported by the set_ftrace_filter interface. +Trace commands have the following format: + +:: + +The following commands are supported: + +- mod + This command enables function filtering per module. The + parameter defines the module. For example, if only the write* + functions in the ext3 module are desired, run: + + echo 'write*:mod:ext3' > set_ftrace_filter + + This command interacts with the filter in the same way as + filtering based on function names. Thus, adding more functions + in a different module is accomplished by appending (>>) to the + filter file. Remove specific module functions by prepending + '!': + + echo '!writeback*:mod:ext3' >> set_ftrace_filter + +- traceon/traceoff + These commands turn tracing on and off when the specified + functions are hit. The parameter determines how many times the + tracing system is turned on and off. If unspecified, there is + no limit. For example, to disable tracing when a schedule bug + is hit the first 5 times, run: + + echo '__schedule_bug:traceoff:5' > set_ftrace_filter + + These commands are cumulative whether or not they are appended + to set_ftrace_filter. To remove a command, prepend it by '!' + and drop the parameter: + + echo '!__schedule_bug:traceoff' > set_ftrace_filter + + trace_pipe ---------- diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 59de2525d303..d4e8b213a462 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -289,7 +289,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { static void sysrq_ftrace_dump(int key, struct tty_struct *tty) { - ftrace_dump(); + ftrace_dump(DUMP_ALL); } static struct sysrq_key_op sysrq_ftrace_dump_op = { .handler = sysrq_ftrace_dump, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index cc12b3c556b3..41e46330d9be 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -82,9 +82,13 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); #else /* !CONFIG_FUNCTION_TRACER */ -# define register_ftrace_function(ops) do { } while (0) -# define unregister_ftrace_function(ops) do { } while (0) -# define clear_ftrace_function(ops) do { } while (0) +/* + * (un)register_ftrace_function must be a macro since the ops parameter + * must not be evaluated. + */ +#define register_ftrace_function(ops) ({ 0; }) +#define unregister_ftrace_function(ops) ({ 0; }) +static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } @@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else -# define skip_trace(ip) ({ 0; }) -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) -# define ftrace_disable_daemon() do { } while (0) -# define ftrace_enable_daemon() do { } while (0) +static inline int skip_trace(unsigned long ip) { return 0; } +static inline int ftrace_force_update(void) { return 0; } +static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ +} +static inline void ftrace_disable_daemon(void) { } +static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { @@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled) extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else -# define time_hardirqs_on(a0, a1) do { } while (0) -# define time_hardirqs_off(a0, a1) do { } while (0) + static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { } + static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_PREEMPT_TRACER extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else -# define trace_preempt_on(a0, a1) do { } while (0) -# define trace_preempt_off(a0, a1) do { } while (0) + static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { } + static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD @@ -352,6 +358,10 @@ struct ftrace_graph_ret { int depth; }; +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* for init task */ @@ -400,10 +410,6 @@ extern char __irqentry_text_end[]; #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ - extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); @@ -441,6 +447,13 @@ static inline void unpause_graph_tracing(void) static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } +static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) +{ + return -1; +} +static inline void unregister_ftrace_graph(void) { } + static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; @@ -492,7 +505,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } -extern int ftrace_dump_on_oops; +enum ftrace_dump_mode; + +extern enum ftrace_dump_mode ftrace_dump_on_oops; #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9365227dbaf6..9fb1c1299032 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -490,6 +490,13 @@ static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } static inline int tracing_is_on(void) { return 0; } #endif + +enum ftrace_dump_mode { + DUMP_NONE, + DUMP_ALL, + DUMP_ORIG, +}; + #ifdef CONFIG_TRACING extern void tracing_start(void); extern void tracing_stop(void); @@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); -extern void ftrace_dump(void); +extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } @@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap) { return 0; } -static inline void ftrace_dump(void) { } +static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c8297761e414..25b4f686d918 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_prepare_sync(void); +void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 78b4bd3be496..1d85f9a6a199 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -33,6 +33,65 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +/* + * Connect a probe to a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_register(const char *name, void *probe); + +/* + * Disconnect a probe from a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_unregister(const char *name, void *probe); + +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + +struct tracepoint_iter { + struct module *module; + struct tracepoint *tracepoint; +}; + +extern void tracepoint_iter_start(struct tracepoint_iter *iter); +extern void tracepoint_iter_next(struct tracepoint_iter *iter); +extern void tracepoint_iter_stop(struct tracepoint_iter *iter); +extern void tracepoint_iter_reset(struct tracepoint_iter *iter); +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end); + +/* + * tracepoint_synchronize_unregister must be called between the last tracepoint + * probe unregistration and the end of module exit to make sure there is no + * caller executing a probe when it is freed. + */ +static inline void tracepoint_synchronize_unregister(void) +{ + synchronize_sched(); +} + +#define PARAMS(args...) args + +#ifdef CONFIG_TRACEPOINTS +extern void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end); +#else +static inline void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ } +#endif /* CONFIG_TRACEPOINTS */ + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include + * file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ + #ifndef DECLARE_TRACE #define TP_PROTO(args...) args @@ -96,9 +155,6 @@ struct tracepoint { #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name) -extern void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end); - #else /* !CONFIG_TRACEPOINTS */ #define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ @@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -static inline void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end) -{ } #endif /* CONFIG_TRACEPOINTS */ #endif /* DECLARE_TRACE */ -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_unregister(const char *name, void *probe); - -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); -extern void tracepoint_probe_update_all(void); - -struct tracepoint_iter { - struct module *module; - struct tracepoint *tracepoint; -}; - -extern void tracepoint_iter_start(struct tracepoint_iter *iter); -extern void tracepoint_iter_next(struct tracepoint_iter *iter); -extern void tracepoint_iter_stop(struct tracepoint_iter *iter); -extern void tracepoint_iter_reset(struct tracepoint_iter *iter); -extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end); - -/* - * tracepoint_synchronize_unregister must be called between the last tracepoint - * probe unregistration and the end of module exit to make sure there is no - * caller executing a probe when it is freed. - */ -static inline void tracepoint_synchronize_unregister(void) -{ - synchronize_sched(); -} - -#define PARAMS(args...) args - -#endif /* _LINUX_TRACEPOINT_H */ - -/* - * Note: we keep the TRACE_EVENT outside the include file ifdef protection. - * This is due to the way trace events work. If a file includes two - * trace event headers under one "CREATE_TRACE_POINTS" the first include - * will override the TRACE_EVENT and break the second include. - */ - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5acfb1eb4df9..1dfab5401511 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -65,6 +65,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* Make all open coded DECLARE_TRACE nops */ +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) + #ifdef CONFIG_EVENT_TRACING #include #endif @@ -75,6 +79,7 @@ #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ +#undef DECLARE_TRACE /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index a8989c4547e7..188deca2f3c7 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -1,4 +1,7 @@ -#ifndef _TRACE_NAPI_H_ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM napi + +#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NAPI_H_ #include @@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), TP_ARGS(napi)); -#endif +#endif /* _TRACE_NAPI_H_ */ + +/* This part must be outside protection */ +#include diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aa3a92b511e2..32837e19e3bd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -264,6 +264,7 @@ struct ftrace_profile { unsigned long counter; #ifdef CONFIG_FUNCTION_GRAPH_TRACER unsigned long long time; + unsigned long long time_squared; #endif }; @@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " Function " - "Hit Time Avg\n" + "Hit Time Avg s^2\n" " -------- " - "--- ---- ---\n"); + "--- ---- --- ---\n"); #else seq_printf(m, " Function Hit\n" " -------- ---\n"); @@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v) static DEFINE_MUTEX(mutex); static struct trace_seq s; unsigned long long avg; + unsigned long long stddev; #endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v) avg = rec->time; do_div(avg, rec->counter); + /* Sample standard deviation (s^2) */ + if (rec->counter <= 1) + stddev = 0; + else { + stddev = rec->time_squared - rec->counter * avg * avg; + /* + * Divide only 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide 1000 again. + */ + do_div(stddev, (rec->counter - 1) * 1000); + } + mutex_lock(&mutex); trace_seq_init(&s); trace_print_graph_duration(rec->time, &s); trace_seq_puts(&s, " "); trace_print_graph_duration(avg, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(stddev, &s); trace_print_seq(m, &s); mutex_unlock(&mutex); #endif @@ -650,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash || !ftrace_profile_enabled) goto out; + /* If the calltime was zero'd ignore it */ + if (!trace->calltime) + goto out; + calltime = trace->rettime - trace->calltime; if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { @@ -668,8 +688,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) } rec = ftrace_find_profiled_func(stat, trace->func); - if (rec) + if (rec) { rec->time += calltime; + rec->time_squared += calltime * calltime; + } out: local_irq_restore(flags); @@ -3338,11 +3360,11 @@ void unregister_ftrace_graph(void) goto out; ftrace_graph_active--; - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); out: mutex_unlock(&ftrace_lock); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5885cdfc41f3..7f6059c5aa94 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2000,17 +2000,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, u64 *delta) { struct ring_buffer_event *event; - static int once; int ret; - if (unlikely(*delta > (1ULL << 59) && !once++)) { - printk(KERN_WARNING "Delta way too big! %llu" - " ts=%llu write stamp = %llu\n", - (unsigned long long)*delta, - (unsigned long long)*ts, - (unsigned long long)cpu_buffer->write_stamp); - WARN_ON(1); - } + WARN_ONCE(*delta > (1ULL << 59), + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + (unsigned long long)*delta, + (unsigned long long)*ts, + (unsigned long long)cpu_buffer->write_stamp); /* * The delta is too big, we to add a @@ -3332,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, EXPORT_SYMBOL_GPL(ring_buffer_consume); /** - * ring_buffer_read_start - start a non consuming read of the buffer + * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer * @buffer: The ring buffer to read from * @cpu: The cpu buffer to iterate over * - * This starts up an iteration through the buffer. It also disables - * the recording to the buffer until the reading is finished. - * This prevents the reading from being corrupted. This is not - * a consuming read, so a producer is not expected. + * This performs the initial preparations necessary to iterate + * through the buffer. Memory is allocated, buffer recording + * is disabled, and the iterator pointer is returned to the caller. * - * Must be paired with ring_buffer_finish. + * Disabling buffer recordng prevents the reading from being + * corrupted. This is not a consuming read, so a producer is not + * expected. + * + * After a sequence of ring_buffer_read_prepare calls, the user is + * expected to make at least one call to ring_buffer_prepare_sync. + * Afterwards, ring_buffer_read_start is invoked to get things going + * for real. + * + * This overall must be paired with ring_buffer_finish. */ struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu) +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_iter *iter; - unsigned long flags; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; @@ -3362,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) iter->cpu_buffer = cpu_buffer; atomic_inc(&cpu_buffer->record_disabled); + + return iter; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); + +/** + * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls + * + * All previously invoked ring_buffer_read_prepare calls to prepare + * iterators will be synchronized. Afterwards, read_buffer_read_start + * calls on those iterators are allowed. + */ +void +ring_buffer_read_prepare_sync(void) +{ synchronize_sched(); +} +EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); + +/** + * ring_buffer_read_start - start a non consuming read of the buffer + * @iter: The iterator returned by ring_buffer_read_prepare + * + * This finalizes the startup of an iteration through the buffer. + * The iterator comes from a call to ring_buffer_read_prepare and + * an intervening ring_buffer_read_prepare_sync must have been + * performed. + * + * Must be paired with ring_buffer_finish. + */ +void +ring_buffer_read_start(struct ring_buffer_iter *iter) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; + + if (!iter) + return; + + cpu_buffer = iter->cpu_buffer; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - - return iter; } EXPORT_SYMBOL_GPL(ring_buffer_read_start); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index dc56556b55a2..302f8a614635 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -113,7 +113,8 @@ static enum event_status read_page(int cpu) ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); if (ret >= 0) { rpage = bpage; - commit = local_read(&rpage->commit); + /* The commit may have missed event flags set, clear them */ + commit = local_read(&rpage->commit) & 0xfffff; for (i = 0; i < commit && !kill_test; i += inc) { if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 60f3b6289731..756d7283318b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -117,9 +117,12 @@ static cpumask_var_t __read_mostly tracing_buffer_mask; * * It is default off, but you can enable it with either specifying * "ftrace_dump_on_oops" in the kernel command line, or setting - * /proc/sys/kernel/ftrace_dump_on_oops to true. + * /proc/sys/kernel/ftrace_dump_on_oops + * Set 1 if you want to dump buffers of all CPUs + * Set 2 if you want to dump the buffer of the CPU that triggered oops */ -int ftrace_dump_on_oops; + +enum ftrace_dump_mode ftrace_dump_on_oops; static int tracing_set_tracer(const char *buf); @@ -139,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { - ftrace_dump_on_oops = 1; - return 1; + if (*str++ != '=' || !*str) { + ftrace_dump_on_oops = DUMP_ALL; + return 1; + } + + if (!strcmp("orig_cpu", str)) { + ftrace_dump_on_oops = DUMP_ORIG; + return 1; + } + + return 0; } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); @@ -1571,7 +1583,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; - unsigned long lost_events, next_lost = 0; + unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; @@ -1796,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m) } -static void +void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -2005,7 +2017,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; } -static int trace_empty(struct trace_iterator *iter) +int trace_empty(struct trace_iterator *iter) { int cpu; @@ -2072,6 +2084,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_trace_fmt(iter); } +void trace_default_header(struct seq_file *m) +{ + struct trace_iterator *iter = m->private; + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2084,17 +2113,9 @@ static int s_show(struct seq_file *m, void *v) } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); - else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { - /* print nothing if the buffers are empty */ - if (trace_empty(iter)) - return 0; - print_trace_header(m, iter); - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_lat_help_header(m); - } else { - if (!(trace_flags & TRACE_ITER_VERBOSE)) - print_func_help_header(m); - } + else + trace_default_header(m); + } else if (iter->leftover) { /* * If we filled the seq_file buffer earlier, we @@ -2180,15 +2201,20 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { - iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + } + ring_buffer_read_prepare_sync(); + for_each_tracing_cpu(cpu) { + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_prepare(iter->tr->buffer, cpu); + ring_buffer_read_prepare_sync(); + ring_buffer_read_start(iter->buffer_iter[cpu]); tracing_iter_reset(iter, cpu); } @@ -4338,7 +4364,7 @@ static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); return NOTIFY_OK; } @@ -4355,7 +4381,7 @@ static int trace_die_handler(struct notifier_block *self, switch (val) { case DIE_OOPS: if (ftrace_dump_on_oops) - ftrace_dump(); + ftrace_dump(ftrace_dump_on_oops); break; default: break; @@ -4396,7 +4422,8 @@ trace_printk_seq(struct trace_seq *s) trace_seq_init(s); } -static void __ftrace_dump(bool disable_tracing) +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) { static arch_spinlock_t ftrace_dump_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; @@ -4429,12 +4456,25 @@ static void __ftrace_dump(bool disable_tracing) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - printk(KERN_TRACE "Dumping ftrace buffer:\n"); - /* Simulate the iterator */ iter.tr = &global_trace; iter.trace = current_trace; - iter.cpu_file = TRACE_PIPE_ALL_CPU; + + switch (oops_dump_mode) { + case DUMP_ALL: + iter.cpu_file = TRACE_PIPE_ALL_CPU; + break; + case DUMP_ORIG: + iter.cpu_file = raw_smp_processor_id(); + break; + case DUMP_NONE: + goto out_enable; + default: + printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); + iter.cpu_file = TRACE_PIPE_ALL_CPU; + } + + printk(KERN_TRACE "Dumping ftrace buffer:\n"); /* * We need to stop all tracing on all CPUS to read the @@ -4473,6 +4513,7 @@ static void __ftrace_dump(bool disable_tracing) else printk(KERN_TRACE "---------------------------------\n"); + out_enable: /* Re-enable tracing if requested */ if (!disable_tracing) { trace_flags |= old_userobj; @@ -4489,9 +4530,9 @@ static void __ftrace_dump(bool disable_tracing) } /* By default: disable tracing after the dump */ -void ftrace_dump(void) +void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { - __ftrace_dump(true); + __ftrace_dump(true, oops_dump_mode); } __init static int tracer_alloc_buffers(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3ebdb6bd2362..d1ce0bec1b3f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -364,6 +364,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void trace_default_header(struct seq_file *m); +void print_trace_header(struct seq_file *m, struct trace_iterator *iter); +int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); @@ -475,9 +478,29 @@ extern int trace_clock_id; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -extern enum print_line_t print_graph_function(struct trace_iterator *iter); + +/* Flag options */ +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +#define TRACE_GRAPH_PRINT_CPU 0x2 +#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 +#define TRACE_GRAPH_PRINT_PROC 0x8 +#define TRACE_GRAPH_PRINT_DURATION 0x10 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 + +extern enum print_line_t +print_graph_function_flags(struct trace_iterator *iter, u32 flags); +extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern enum print_line_t trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); +extern void graph_trace_open(struct trace_iterator *iter); +extern void graph_trace_close(struct trace_iterator *iter); +extern int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned long flags, int pc); +extern void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, + unsigned long flags, int pc); + #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -508,7 +531,7 @@ static inline int ftrace_graph_addr(unsigned long addr) #endif /* CONFIG_DYNAMIC_FTRACE */ #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 669b9c31861d..dd11c830eb84 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -40,7 +40,7 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 -#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 +#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -179,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } -static int __trace_graph_entry(struct trace_array *tr, +int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -246,7 +246,7 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) return trace_graph_entry(trace); } -static void __trace_graph_return(struct trace_array *tr, +void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -527,17 +527,18 @@ get_return_for_leaf(struct trace_iterator *iter, /* Signal a overhead of time execution to the output */ static int -print_graph_overhead(unsigned long long duration, struct trace_seq *s) +print_graph_overhead(unsigned long long duration, struct trace_seq *s, + u32 flags) { /* If duration disappear, we don't need anything */ - if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) + if (!(flags & TRACE_GRAPH_PRINT_DURATION)) return 1; /* Non nested entry or return */ if (duration == -1) return trace_seq_printf(s, " "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { /* Duration exceeded 100 msecs */ if (duration > 100000ULL) return trace_seq_printf(s, "! "); @@ -563,7 +564,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s) static enum print_line_t print_graph_irq(struct trace_iterator *iter, unsigned long addr, - enum trace_type type, int cpu, pid_t pid) + enum trace_type type, int cpu, pid_t pid, u32 flags) { int ret; struct trace_seq *s = &iter->seq; @@ -573,21 +574,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_UNHANDLED; /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -597,7 +598,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -610,7 +611,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_PARTIAL_LINE; /* Don't close the duration column if haven't one */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) trace_seq_printf(s, " |"); ret = trace_seq_printf(s, "\n"); @@ -680,7 +681,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) static enum print_line_t print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) + struct ftrace_graph_ret_entry *ret_entry, + struct trace_seq *s, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ret *graph_ret; @@ -712,12 +714,12 @@ print_graph_entry_leaf(struct trace_iterator *iter, } /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -740,7 +742,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, static enum print_line_t print_graph_entry_nested(struct trace_iterator *iter, struct ftrace_graph_ent_entry *entry, - struct trace_seq *s, int cpu) + struct trace_seq *s, int cpu, u32 flags) { struct ftrace_graph_ent *call = &entry->graph_ent; struct fgraph_data *data = iter->private; @@ -760,12 +762,12 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -791,7 +793,7 @@ print_graph_entry_nested(struct trace_iterator *iter, static enum print_line_t print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, - int type, unsigned long addr) + int type, unsigned long addr, u32 flags) { struct fgraph_data *data = iter->private; struct trace_entry *ent = iter->ent; @@ -804,27 +806,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, if (type) { /* Interrupt */ - ret = print_graph_irq(iter, addr, type, cpu, ent->pid); + ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Absolute time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { ret = print_graph_abs_time(iter->ts, s); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* Cpu */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + if (flags & TRACE_GRAPH_PRINT_CPU) { ret = print_graph_cpu(s, cpu); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; } /* Proc */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + if (flags & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -846,7 +848,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, static enum print_line_t print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, - struct trace_iterator *iter) + struct trace_iterator *iter, u32 flags) { struct fgraph_data *data = iter->private; struct ftrace_graph_ent *call = &field->graph_ent; @@ -854,14 +856,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t ret; int cpu = iter->cpu; - if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) + if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) return TRACE_TYPE_PARTIAL_LINE; leaf_ret = get_return_for_leaf(iter, field); if (leaf_ret) - ret = print_graph_entry_leaf(iter, field, leaf_ret, s); + ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags); else - ret = print_graph_entry_nested(iter, field, s, cpu); + ret = print_graph_entry_nested(iter, field, s, cpu, flags); if (data) { /* @@ -880,7 +882,8 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, - struct trace_entry *ent, struct trace_iterator *iter) + struct trace_entry *ent, struct trace_iterator *iter, + u32 flags) { unsigned long long duration = trace->rettime - trace->calltime; struct fgraph_data *data = iter->private; @@ -910,16 +913,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } } - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* Overhead */ - ret = print_graph_overhead(duration, s); + ret = print_graph_overhead(duration, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Duration */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = print_graph_duration(duration, s); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -949,14 +952,15 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } /* Overrun */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + if (flags & TRACE_GRAPH_PRINT_OVERRUN) { ret = trace_seq_printf(s, " (Overruns: %lu)\n", trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } - ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); + ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, + cpu, pid, flags); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; @@ -964,8 +968,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } static enum print_line_t -print_graph_comment(struct trace_seq *s, struct trace_entry *ent, - struct trace_iterator *iter) +print_graph_comment(struct trace_seq *s, struct trace_entry *ent, + struct trace_iterator *iter, u32 flags) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct fgraph_data *data = iter->private; @@ -977,16 +981,16 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, if (data) depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; - if (print_graph_prologue(iter, s, 0, 0)) + if (print_graph_prologue(iter, s, 0, 0, flags)) return TRACE_TYPE_PARTIAL_LINE; /* No overhead */ - ret = print_graph_overhead(-1, s); + ret = print_graph_overhead(-1, s, flags); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* No time */ - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { + if (flags & TRACE_GRAPH_PRINT_DURATION) { ret = trace_seq_printf(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1041,7 +1045,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, enum print_line_t -print_graph_function(struct trace_iterator *iter) +print_graph_function_flags(struct trace_iterator *iter, u32 flags) { struct ftrace_graph_ent_entry *field; struct fgraph_data *data = iter->private; @@ -1062,7 +1066,7 @@ print_graph_function(struct trace_iterator *iter) if (data && data->failed) { field = &data->ent; iter->cpu = data->cpu; - ret = print_graph_entry(field, s, iter); + ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; ret = TRACE_TYPE_NO_CONSUME; @@ -1082,32 +1086,49 @@ print_graph_function(struct trace_iterator *iter) struct ftrace_graph_ent_entry saved; trace_assign_type(field, entry); saved = *field; - return print_graph_entry(&saved, s, iter); + return print_graph_entry(&saved, s, iter, flags); } case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); - return print_graph_return(&field->ret, s, entry, iter); + return print_graph_return(&field->ret, s, entry, iter, flags); } + case TRACE_STACK: + case TRACE_FN: + /* dont trace stack and functions as comments */ + return TRACE_TYPE_UNHANDLED; + default: - return print_graph_comment(s, entry, iter); + return print_graph_comment(s, entry, iter, flags); } return TRACE_TYPE_HANDLED; } -static void print_lat_header(struct seq_file *s) +static enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + return print_graph_function_flags(iter, tracer_flags.val); +} + +static enum print_line_t +print_graph_function_event(struct trace_iterator *iter, int flags) +{ + return print_graph_function(iter); +} + +static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ " " /* 4 spaces */ " "; /* 17 spaces */ int size = 0; - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) size += 16; - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) size += 4; - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) size += 17; seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); @@ -1118,43 +1139,48 @@ static void print_lat_header(struct seq_file *s) seq_printf(s, "#%.*s|||| / \n", size, spaces); } -static void print_graph_headers(struct seq_file *s) +void print_graph_headers_flags(struct seq_file *s, u32 flags) { int lat = trace_flags & TRACE_ITER_LATENCY_FMT; if (lat) - print_lat_header(s); + print_lat_header(s, flags); /* 1st line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " TIME "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " CPU"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " TASK/PID "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " DURATION "); seq_printf(s, " FUNCTION CALLS\n"); /* 2nd line */ seq_printf(s, "#"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) + if (flags & TRACE_GRAPH_PRINT_ABS_TIME) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) + if (flags & TRACE_GRAPH_PRINT_CPU) seq_printf(s, " | "); - if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) + if (flags & TRACE_GRAPH_PRINT_PROC) seq_printf(s, " | | "); if (lat) seq_printf(s, "|||||"); - if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) + if (flags & TRACE_GRAPH_PRINT_DURATION) seq_printf(s, " | | "); seq_printf(s, " | | | |\n"); } -static void graph_trace_open(struct trace_iterator *iter) +void print_graph_headers(struct seq_file *s) +{ + print_graph_headers_flags(s, tracer_flags.val); +} + +void graph_trace_open(struct trace_iterator *iter) { /* pid and depth on the last trace processed */ struct fgraph_data *data; @@ -1189,7 +1215,7 @@ static void graph_trace_open(struct trace_iterator *iter) pr_warning("function graph tracer: not enough memory\n"); } -static void graph_trace_close(struct trace_iterator *iter) +void graph_trace_close(struct trace_iterator *iter) { struct fgraph_data *data = iter->private; @@ -1199,6 +1225,16 @@ static void graph_trace_close(struct trace_iterator *iter) } } +static struct trace_event graph_trace_entry_event = { + .type = TRACE_GRAPH_ENT, + .trace = print_graph_function_event, +}; + +static struct trace_event graph_trace_ret_event = { + .type = TRACE_GRAPH_RET, + .trace = print_graph_function_event, +}; + static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, @@ -1220,6 +1256,16 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); + if (!register_ftrace_event(&graph_trace_entry_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + + if (!register_ftrace_event(&graph_trace_ret_event)) { + pr_warning("Warning: could not register graph trace events\n"); + return 1; + } + return register_tracer(&graph_trace); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2974bc7538c7..6fd486e0cef4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -34,6 +34,9 @@ static int trace_type __read_mostly; static int save_lat_flag; +static void stop_irqsoff_tracer(struct trace_array *tr, int graph); +static int start_irqsoff_tracer(struct trace_array *tr, int graph); + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -55,6 +58,23 @@ irq_trace(void) # define irq_trace() (0) #endif +#define TRACE_DISPLAY_GRAPH 1 + +static struct tracer_opt trace_opts[] = { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* display latency trace as call graph */ + { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +#endif + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, + .opts = trace_opts, +}; + +#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly = }; #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + int cpu; + + if (!(bit & TRACE_DISPLAY_GRAPH)) + return -EINVAL; + + if (!(is_graph() ^ set)) + return 0; + + stop_irqsoff_tracer(irqsoff_trace, !set); + + for_each_possible_cpu(cpu) + per_cpu(tracing_cpu, cpu) = 0; + + tracing_max_latency = 0; + tracing_reset_online_cpus(irqsoff_trace); + + return start_irqsoff_tracer(irqsoff_trace, set); +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int ret; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return 0; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return 0; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else + ret = 0; + + atomic_dec(&data->disabled); + return ret; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) + return; + + local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return; + + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, trace, flags, pc); + } + + atomic_dec(&data->disabled); +} + +static void irqsoff_trace_open(struct trace_iterator *iter) +{ + if (is_graph()) + graph_trace_open(iter); + +} + +static void irqsoff_trace_close(struct trace_iterator *iter) +{ + if (iter->private) + graph_trace_close(iter); +} + +#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ + TRACE_GRAPH_PRINT_PROC) + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) + flags |= TRACE_GRAPH_PRINT_DURATION; + else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + /* + * In graph mode call the graph tracer output function, + * otherwise go with the TRACE_FN event handler + */ + if (is_graph()) + return print_graph_function_flags(iter, flags); + + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_print_header(struct seq_file *s) +{ + if (is_graph()) { + struct trace_iterator *iter = s->private; + u32 flags = GRAPH_TRACER_FLAGS; + + if (trace_flags & TRACE_ITER_LATENCY_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return; + + print_trace_header(s, iter); + flags |= TRACE_GRAPH_PRINT_DURATION; + } else + flags |= TRACE_GRAPH_PRINT_ABS_TIME; + + print_graph_headers_flags(s, flags); + } else + trace_default_header(s); +} + +static void +trace_graph_function(struct trace_array *tr, + unsigned long ip, unsigned long flags, int pc) +{ + u64 time = trace_clock_local(); + struct ftrace_graph_ent ent = { + .func = ip, + .depth = 0, + }; + struct ftrace_graph_ret ret = { + .func = ip, + .depth = 0, + .calltime = time, + .rettime = time, + }; + + __trace_graph_entry(tr, &ent, flags, pc); + __trace_graph_return(tr, &ret, flags, pc); +} + +static void +__trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, + unsigned long flags, int pc) +{ + if (!is_graph()) + trace_function(tr, ip, parent_ip, flags, pc); + else { + trace_graph_function(tr, parent_ip, flags, pc); + trace_graph_function(tr, ip, flags, pc); + } +} + +#else +#define __trace_function trace_function + +static int irqsoff_set_flag(u32 old_flags, u32 bit, int set) +{ + return -EINVAL; +} + +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +{ + return -1; +} + +static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} + +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } +static void irqsoff_print_header(struct seq_file *s) { } +static void irqsoff_trace_open(struct trace_iterator *iter) { } +static void irqsoff_trace_close(struct trace_iterator *iter) { } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* * Should this new latency be reported/recorded? */ @@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); /* Skip 5 functions to get to the irq/preempt enable function */ __trace_stack(tr, flags, 5, pc); @@ -172,7 +388,7 @@ check_critical_timing(struct trace_array *tr, out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, preempt_count()); + __trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static void start_irqsoff_tracer(struct trace_array *tr) +static int start_irqsoff_tracer(struct trace_array *tr, int graph) { - register_ftrace_function(&trace_ops); - if (tracing_is_enabled()) + int ret = 0; + + if (!graph) + ret = register_ftrace_function(&trace_ops); + else + ret = register_ftrace_graph(&irqsoff_graph_return, + &irqsoff_graph_entry); + + if (!ret && tracing_is_enabled()) tracer_enabled = 1; else tracer_enabled = 0; + + return ret; } -static void stop_irqsoff_tracer(struct trace_array *tr) +static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_ftrace_function(&trace_ops); + + if (!graph) + unregister_ftrace_function(&trace_ops); + else + unregister_ftrace_graph(); } static void __irqsoff_tracer_init(struct trace_array *tr) @@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr) /* make sure that the tracer is visible */ smp_wmb(); tracing_reset_online_cpus(tr); - start_irqsoff_tracer(tr); + + if (start_irqsoff_tracer(tr, is_graph())) + printk(KERN_ERR "failed to start irqsoff tracer\n"); } static void irqsoff_tracer_reset(struct trace_array *tr) { - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr, is_graph()); if (!save_lat_flag) trace_flags &= ~TRACE_ITER_LATENCY_FMT; @@ -409,9 +640,15 @@ static struct tracer irqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -435,9 +672,15 @@ static struct tracer preemptoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -463,9 +706,15 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .start = irqsoff_tracer_start, .stop = irqsoff_tracer_stop, .print_max = 1, + .print_header = irqsoff_print_header, + .print_line = irqsoff_print_line, + .flags = &tracer_flags, + .set_flag = irqsoff_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, #endif + .open = irqsoff_trace_open, + .close = irqsoff_trace_close, }; # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 8e46b3323cdc..2404c129a8c9 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -253,7 +253,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) void *ret; if (s->full) - return 0; + return NULL; if (len > ((PAGE_SIZE - 1) - s->len)) { s->full = 1; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 71fa771ee4d7..250e7f9bd2f0 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -255,7 +255,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* Maximum number of functions to trace before diagnosing a hang */ #define GRAPH_MAX_FUNC_TEST 100000000 -static void __ftrace_dump(bool disable_tracing); +static void +__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ @@ -266,7 +267,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) ftrace_graph_stop(); printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); if (ftrace_dump_on_oops) - __ftrace_dump(false); + __ftrace_dump(false, DUMP_ALL); return 0; }