static_call: Add inline static call infrastructure

Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE
option, which is a faster version of CONFIG_HAVE_STATIC_CALL.  At
runtime, the static call sites are patched directly, rather than using
the out-of-line trampolines.

Compared to out-of-line static calls, the performance benefits are more
modest, but still measurable.  Steven Rostedt did some tracepoint
measurements:

  https://lkml.kernel.org/r/20181126155405.72b4f718@gandalf.local.home

This code is heavily inspired by the jump label code (aka "static
jumps"), as some of the concepts are very similar.

For more details, see the comments in include/linux/static_call.h.

[peterz: simplified interface; merged trampolines]

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135804.684334440@infradead.org
This commit is contained in:
Josh Poimboeuf 2020-08-18 15:57:42 +02:00 committed by Ingo Molnar
parent 115284d89a
commit 9183c3f9ed
8 changed files with 373 additions and 1 deletions

View File

@ -978,6 +978,10 @@ config ARCH_HAS_VDSO_DATA
config HAVE_STATIC_CALL
bool
config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"

View File

@ -388,6 +388,12 @@
KEEP(*(__jump_table)) \
__stop___jump_table = .;
#define STATIC_CALL_DATA \
. = ALIGN(8); \
__start_static_call_sites = .; \
KEEP(*(.static_call_sites)) \
__stop_static_call_sites = .;
/*
* Allow architectures to handle ro_after_init data on their
* own by defining an empty RO_AFTER_INIT_DATA.
@ -398,6 +404,7 @@
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
STATIC_CALL_DATA \
__end_ro_after_init = .;
#endif

View File

@ -25,6 +25,7 @@
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
#include <linux/srcu.h>
#include <linux/static_call_types.h>
#include <linux/percpu.h>
#include <asm/module.h>
@ -498,6 +499,10 @@ struct module {
unsigned long *kprobe_blacklist;
unsigned int num_kprobe_blacklist;
#endif
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
int num_static_call_sites;
struct static_call_site *static_call_sites;
#endif
#ifdef CONFIG_LIVEPATCH
bool klp; /* Is this a livepatch module? */

View File

@ -95,7 +95,41 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func);
STATIC_CALL_TRAMP_ADDR(name), func); \
})
#if defined(CONFIG_HAVE_STATIC_CALL)
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
struct static_call_mod {
struct static_call_mod *next;
struct module *mod; /* for vmlinux, mod == NULL */
struct static_call_site *sites;
};
struct static_call_key {
void *func;
struct static_call_mod *mods;
};
extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);
extern int static_call_mod_init(struct module *mod);
#define DEFINE_STATIC_CALL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = _func, \
.mods = NULL, \
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
#define static_call(name) __static_call(name)
#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
#define EXPORT_STATIC_CALL_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
#elif defined(CONFIG_HAVE_STATIC_CALL)
struct static_call_key {
void *func;

View File

@ -2,14 +2,27 @@
#ifndef _STATIC_CALL_TYPES_H
#define _STATIC_CALL_TYPES_H
#include <linux/types.h>
#include <linux/stringify.h>
#define STATIC_CALL_KEY_PREFIX __SCK__
#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
#define STATIC_CALL_TRAMP_PREFIX __SCT__
#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
/*
* The static call site table needs to be created by external tooling (objtool
* or a compiler plugin).
*/
struct static_call_site {
s32 addr;
s32 key;
};
#endif /* _STATIC_CALL_TYPES_H */

View File

@ -111,6 +111,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
obj-$(CONFIG_KCSAN) += kcsan/
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
obj-$(CONFIG_PERF_EVENTS) += events/

View File

@ -3274,6 +3274,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
sizeof(unsigned long),
&mod->num_kprobe_blacklist);
#endif
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
mod->static_call_sites = section_objs(info, ".static_call_sites",
sizeof(*mod->static_call_sites),
&mod->num_static_call_sites);
#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);

303
kernel/static_call.c Normal file
View File

@ -0,0 +1,303 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/static_call.h>
#include <linux/bug.h>
#include <linux/smp.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/processor.h>
#include <asm/sections.h>
extern struct static_call_site __start_static_call_sites[],
__stop_static_call_sites[];
static bool static_call_initialized;
#define STATIC_CALL_INIT 1UL
/* mutex to protect key modules/sites */
static DEFINE_MUTEX(static_call_mutex);
static void static_call_lock(void)
{
mutex_lock(&static_call_mutex);
}
static void static_call_unlock(void)
{
mutex_unlock(&static_call_mutex);
}
static inline void *static_call_addr(struct static_call_site *site)
{
return (void *)((long)site->addr + (long)&site->addr);
}
static inline struct static_call_key *static_call_key(const struct static_call_site *site)
{
return (struct static_call_key *)
(((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
}
/* These assume the key is word-aligned. */
static inline bool static_call_is_init(struct static_call_site *site)
{
return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
}
static inline void static_call_set_init(struct static_call_site *site)
{
site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
(long)&site->key;
}
static int static_call_site_cmp(const void *_a, const void *_b)
{
const struct static_call_site *a = _a;
const struct static_call_site *b = _b;
const struct static_call_key *key_a = static_call_key(a);
const struct static_call_key *key_b = static_call_key(b);
if (key_a < key_b)
return -1;
if (key_a > key_b)
return 1;
return 0;
}
static void static_call_site_swap(void *_a, void *_b, int size)
{
long delta = (unsigned long)_a - (unsigned long)_b;
struct static_call_site *a = _a;
struct static_call_site *b = _b;
struct static_call_site tmp = *a;
a->addr = b->addr - delta;
a->key = b->key - delta;
b->addr = tmp.addr + delta;
b->key = tmp.key + delta;
}
static inline void static_call_sort_entries(struct static_call_site *start,
struct static_call_site *stop)
{
sort(start, stop - start, sizeof(struct static_call_site),
static_call_site_cmp, static_call_site_swap);
}
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
struct static_call_site *site, *stop;
struct static_call_mod *site_mod;
cpus_read_lock();
static_call_lock();
if (key->func == func)
goto done;
key->func = func;
arch_static_call_transform(NULL, tramp, func);
/*
* If uninitialized, we'll not update the callsites, but they still
* point to the trampoline and we just patched that.
*/
if (WARN_ON_ONCE(!static_call_initialized))
goto done;
for (site_mod = key->mods; site_mod; site_mod = site_mod->next) {
struct module *mod = site_mod->mod;
if (!site_mod->sites) {
/*
* This can happen if the static call key is defined in
* a module which doesn't use it.
*/
continue;
}
stop = __stop_static_call_sites;
#ifdef CONFIG_MODULES
if (mod) {
stop = mod->static_call_sites +
mod->num_static_call_sites;
}
#endif
for (site = site_mod->sites;
site < stop && static_call_key(site) == key; site++) {
void *site_addr = static_call_addr(site);
if (static_call_is_init(site)) {
/*
* Don't write to call sites which were in
* initmem and have since been freed.
*/
if (!mod && system_state >= SYSTEM_RUNNING)
continue;
if (mod && !within_module_init((unsigned long)site_addr, mod))
continue;
}
if (!kernel_text_address((unsigned long)site_addr)) {
WARN_ONCE(1, "can't patch static call site at %pS",
site_addr);
continue;
}
arch_static_call_transform(site_addr, NULL, func);
}
}
done:
static_call_unlock();
cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(__static_call_update);
static int __static_call_init(struct module *mod,
struct static_call_site *start,
struct static_call_site *stop)
{
struct static_call_site *site;
struct static_call_key *key, *prev_key = NULL;
struct static_call_mod *site_mod;
if (start == stop)
return 0;
static_call_sort_entries(start, stop);
for (site = start; site < stop; site++) {
void *site_addr = static_call_addr(site);
if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
(!mod && init_section_contains(site_addr, 1)))
static_call_set_init(site);
key = static_call_key(site);
if (key != prev_key) {
prev_key = key;
site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
if (!site_mod)
return -ENOMEM;
site_mod->mod = mod;
site_mod->sites = site;
site_mod->next = key->mods;
key->mods = site_mod;
}
arch_static_call_transform(site_addr, NULL, key->func);
}
return 0;
}
#ifdef CONFIG_MODULES
static int static_call_add_module(struct module *mod)
{
return __static_call_init(mod, mod->static_call_sites,
mod->static_call_sites + mod->num_static_call_sites);
}
static void static_call_del_module(struct module *mod)
{
struct static_call_site *start = mod->static_call_sites;
struct static_call_site *stop = mod->static_call_sites +
mod->num_static_call_sites;
struct static_call_key *key, *prev_key = NULL;
struct static_call_mod *site_mod, **prev;
struct static_call_site *site;
for (site = start; site < stop; site++) {
key = static_call_key(site);
if (key == prev_key)
continue;
prev_key = key;
for (prev = &key->mods, site_mod = key->mods;
site_mod && site_mod->mod != mod;
prev = &site_mod->next, site_mod = site_mod->next)
;
if (!site_mod)
continue;
*prev = site_mod->next;
kfree(site_mod);
}
}
static int static_call_module_notify(struct notifier_block *nb,
unsigned long val, void *data)
{
struct module *mod = data;
int ret = 0;
cpus_read_lock();
static_call_lock();
switch (val) {
case MODULE_STATE_COMING:
ret = static_call_add_module(mod);
if (ret) {
WARN(1, "Failed to allocate memory for static calls");
static_call_del_module(mod);
}
break;
case MODULE_STATE_GOING:
static_call_del_module(mod);
break;
}
static_call_unlock();
cpus_read_unlock();
return notifier_from_errno(ret);
}
static struct notifier_block static_call_module_nb = {
.notifier_call = static_call_module_notify,
};
#endif /* CONFIG_MODULES */
static void __init static_call_init(void)
{
int ret;
if (static_call_initialized)
return;
cpus_read_lock();
static_call_lock();
ret = __static_call_init(NULL, __start_static_call_sites,
__stop_static_call_sites);
static_call_unlock();
cpus_read_unlock();
if (ret) {
pr_err("Failed to allocate memory for static_call!\n");
BUG();
}
static_call_initialized = true;
#ifdef CONFIG_MODULES
register_module_notifier(&static_call_module_nb);
#endif
}
early_initcall(static_call_init);