tracing: Add __tracepoint_string() to export string pointers

There are several tracepoints (mostly in RCU), that reference a string pointer and uses the print format of "%s" to display the string that exists in the kernel, instead of copying the actual string to the ring buffer (saves time and ring buffer space). But this has an issue with userspace tools that read the binary buffers that has the address of the string but has no access to what the string itself is. The end result is just output that looks like: rcu_dyntick: ffffffff818adeaa 1 0 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_dyntick: ffffffff818adeb5 0 140000000000000 rcu_utilization: ffffffff8184333b rcu_utilization: ffffffff8184333b The above is pretty useless when read by the userspace tools. Ideally we would want something that looks like this: rcu_dyntick: Start 1 0 rcu_dyntick: End 0 140000000000000 rcu_dyntick: Start 140000000000000 0 rcu_callback: rcu_preempt rhp=0xffff880037aff710 func=put_cred_rcu 0/4 rcu_callback: rcu_preempt rhp=0xffff880078961980 func=file_free_rcu 0/5 rcu_dyntick: End 0 1 The trace_printk() which also only stores the address of the string format instead of recording the string into the buffer itself, exports the mapping of kernel addresses to format strings via the printk_format file in the debugfs tracing directory. The tracepoint strings can use this same method and output the format to the same file and the userspace tools will be able to decipher the address without any modification. The tracepoint strings need its own section to save the strings because the trace_printk section will cause the trace_printk() buffers to be allocated if anything exists within the section. trace_printk() is only used for debugging and should never exist in the kernel, we can not use the trace_printk sections. Add a new tracepoint_str section that will also be examined by the output of the printk_format file. Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2013-07-12 17:07:27 -04:00 · 2013-07-12 17:07:27 -04:00 · 102c9323c3
parent 3b2f64d00c
commit 102c9323c3
4 changed files with 62 additions and 1 deletions
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@ -122,8 +122,12 @@
 #define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;      \
 			 *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
 			 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
+#define TRACEPOINT_STR() VMLINUX_SYMBOL(__start___tracepoint_str) = .;	\
+			 *(__tracepoint_str) /* Trace_printk fmt' pointer */ \
+			 VMLINUX_SYMBOL(__stop___tracepoint_str) = .;
 #else
 #define TRACE_PRINTKS()
+#define TRACEPOINT_STR()
 #endif

 #ifdef CONFIG_FTRACE_SYSCALLS
@ -190,7 +194,8 @@
 	VMLINUX_SYMBOL(__stop___verbose) = .;				\
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
-	TRACE_PRINTKS()
+	TRACE_PRINTKS()							\
+	TRACEPOINT_STR()

 /*
 * Data section helpers
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@ -357,6 +357,40 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)

+/**
+ * tracepoint_string - register constant persistent string to trace system
+ * @str - a constant persistent string that will be referenced in tracepoints
+ *
+ * If constant strings are being used in tracepoints, it is faster and
+ * more efficient to just save the pointer to the string and reference
+ * that with a printf "%s" instead of saving the string in the ring buffer
+ * and wasting space and time.
+ *
+ * The problem with the above approach is that userspace tools that read
+ * the binary output of the trace buffers do not have access to the string.
+ * Instead they just show the address of the string which is not very
+ * useful to users.
+ *
+ * With tracepoint_string(), the string will be registered to the tracing
+ * system and exported to userspace via the debugfs/tracing/printk_formats
+ * file that maps the string address to the string text. This way userspace
+ * tools that read the binary buffers have a way to map the pointers to
+ * the ASCII strings they represent.
+ *
+ * The @str used must be a constant string and persistent as it would not
+ * make sense to show a string that no longer exists. But it is still fine
+ * to be used with modules, because when modules are unloaded, if they
+ * had tracepoints, the ring buffers are cleared too. As long as the string
+ * does not change during the life of the module, it is fine to use
+ * tracepoint_string() within a module.
+ */
+#define tracepoint_string(str)						\
+	({								\
+		static const char *___tp_str __tracepoint_string = str; \
+		___tp_str;						\
+	})
+#define __tracepoint_string	__attribute__((section("__tracepoint_str")))
+
 #ifdef CONFIG_PERF_EVENTS
 struct perf_event;

--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@ -1022,6 +1022,9 @@ extern struct list_head ftrace_events;
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];

+extern const char *__start___tracepoint_str[];
+extern const char *__stop___tracepoint_str[];
+
 void trace_printk_init_buffers(void);
 void trace_printk_start_comm(void);
 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@ -244,12 +244,31 @@ static const char **find_next(void *v, loff_t *pos)
 {
 	const char **fmt = v;
 	int start_index;
+	int last_index;

 	start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;

 	if (*pos < start_index)
 		return __start___trace_bprintk_fmt + *pos;

+	/*
+	 * The __tracepoint_str section is treated the same as the
+	 * __trace_printk_fmt section. The difference is that the
+	 * __trace_printk_fmt section should only be used by trace_printk()
+	 * in a debugging environment, as if anything exists in that section
+	 * the trace_prink() helper buffers are allocated, which would just
+	 * waste space in a production environment.
+	 *
+	 * The __tracepoint_str sections on the other hand are used by
+	 * tracepoints which need to map pointers to their strings to
+	 * the ASCII text for userspace.
+	 */
+	last_index = start_index;
+	start_index = __stop___tracepoint_str - __start___tracepoint_str;
+
+	if (*pos < last_index + start_index)
+		return __start___tracepoint_str + (*pos - last_index);
+
 	return find_next_mod_format(start_index, v, fmt, pos);
 }