From 70d932985757fbe978024db313001218e9f8fe5c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:36 +0200 Subject: [PATCH 01/21] notifier: Fix broken error handling pattern The current notifiers have the following error handling pattern all over the place: int err, nr; err = __foo_notifier_call_chain(&chain, val_up, v, -1, &nr); if (err & NOTIFIER_STOP_MASK) __foo_notifier_call_chain(&chain, val_down, v, nr-1, NULL) And aside from the endless repetition thereof, it is broken. Consider blocking notifiers; both calls take and drop the rwsem, this means that the notifier list can change in between the two calls, making @nr meaningless. Fix this by replacing all the __foo_notifier_call_chain() functions with foo_notifier_call_chain_robust() that embeds the above pattern, but ensures it is inside a single lock region. Note: I switched atomic_notifier_call_chain_robust() to use the spinlock, since RCU cannot provide the guarantee required for the recovery. Note: software_resume() error handling was broken afaict. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200818135804.325626653@infradead.org --- include/linux/notifier.h | 15 ++- kernel/cpu_pm.c | 48 ++++------ kernel/notifier.c | 144 ++++++++++++++++++----------- kernel/power/hibernate.c | 39 ++++---- kernel/power/main.c | 8 +- kernel/power/power.h | 3 +- kernel/power/suspend.c | 14 ++- kernel/power/user.c | 14 +-- tools/power/pm-graph/sleepgraph.py | 2 +- 9 files changed, 147 insertions(+), 140 deletions(-) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 018947611483..2fb373a5c1ed 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -161,20 +161,19 @@ extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); -extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); -extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); -extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); -extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); + +extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 44a259338e33..f7e1d0eccdbc 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -15,18 +15,28 @@ static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); -static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) +static int cpu_pm_notify(enum cpu_pm_event event) { int ret; /* - * __atomic_notifier_call_chain has a RCU read critical section, which + * atomic_notifier_call_chain has a RCU read critical section, which * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let * RCU know this. */ rcu_irq_enter_irqson(); - ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, - nr_to_call, nr_calls); + ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL); + rcu_irq_exit_irqson(); + + return notifier_to_errno(ret); +} + +static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down) +{ + int ret; + + rcu_irq_enter_irqson(); + ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL); rcu_irq_exit_irqson(); return notifier_to_errno(ret); @@ -80,18 +90,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); */ int cpu_pm_enter(void) { - int nr_calls = 0; - int ret = 0; - - ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); - if (ret) - /* - * Inform listeners (nr_calls - 1) about failure of CPU PM - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); - - return ret; + return cpu_pm_notify_robust(CPU_PM_ENTER, CPU_PM_ENTER_FAILED); } EXPORT_SYMBOL_GPL(cpu_pm_enter); @@ -109,7 +108,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); */ int cpu_pm_exit(void) { - return cpu_pm_notify(CPU_PM_EXIT, -1, NULL); + return cpu_pm_notify(CPU_PM_EXIT); } EXPORT_SYMBOL_GPL(cpu_pm_exit); @@ -131,18 +130,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); */ int cpu_cluster_pm_enter(void) { - int nr_calls = 0; - int ret = 0; - - ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); - if (ret) - /* - * Inform listeners (nr_calls - 1) about failure of CPU cluster - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); - - return ret; + return cpu_pm_notify_robust(CPU_CLUSTER_PM_ENTER, CPU_CLUSTER_PM_ENTER_FAILED); } EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); @@ -163,7 +151,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); */ int cpu_cluster_pm_exit(void) { - return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); + return cpu_pm_notify(CPU_CLUSTER_PM_EXIT); } EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit); diff --git a/kernel/notifier.c b/kernel/notifier.c index 84c987dfbe03..1b019cbca594 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -94,6 +94,34 @@ static int notifier_call_chain(struct notifier_block **nl, } NOKPROBE_SYMBOL(notifier_call_chain); +/** + * notifier_call_chain_robust - Inform the registered notifiers about an event + * and rollback on error. + * @nl: Pointer to head of the blocking notifier chain + * @val_up: Value passed unmodified to the notifier function + * @val_down: Value passed unmodified to the notifier function when recovering + * from an error on @val_up + * @v Pointer passed unmodified to the notifier function + * + * NOTE: It is important the @nl chain doesn't change between the two + * invocations of notifier_call_chain() such that we visit the + * exact same notifier callbacks; this rules out any RCU usage. + * + * Returns: the return value of the @val_up call. + */ +static int notifier_call_chain_robust(struct notifier_block **nl, + unsigned long val_up, unsigned long val_down, + void *v) +{ + int ret, nr = 0; + + ret = notifier_call_chain(nl, val_up, v, -1, &nr); + if (ret & NOTIFY_STOP_MASK) + notifier_call_chain(nl, val_down, v, nr-1, NULL); + + return ret; +} + /* * Atomic notifier chain routines. Registration and unregistration * use a spinlock, and call_chain is synchronized by RCU (no locks). @@ -144,13 +172,30 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, } EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); +int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + unsigned long flags; + int ret; + + /* + * Musn't use RCU; because then the notifier list can + * change between the up and down traversal. + */ + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); + spin_unlock_irqrestore(&nh->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust); +NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust); + /** - * __atomic_notifier_call_chain - Call functions in an atomic notifier chain + * atomic_notifier_call_chain - Call functions in an atomic notifier chain * @nh: Pointer to head of the atomic notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See the comment for notifier_call_chain. - * @nr_calls: See the comment for notifier_call_chain. * * Calls each function in a notifier chain in turn. The functions * run in an atomic context, so they must not block. @@ -163,24 +208,16 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) { int ret; rcu_read_lock(); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); -NOKPROBE_SYMBOL(__atomic_notifier_call_chain); -int atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v) -{ - return __atomic_notifier_call_chain(nh, val, v, -1, NULL); + return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); NOKPROBE_SYMBOL(atomic_notifier_call_chain); @@ -250,13 +287,30 @@ int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); +int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + int ret = NOTIFY_DONE; + + /* + * We check the head outside the lock, but if this access is + * racy then it does not matter what the result of the test + * is, we re-check the list after having taken the lock anyway: + */ + if (rcu_access_pointer(nh->head)) { + down_read(&nh->rwsem); + ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); + up_read(&nh->rwsem); + } + return ret; +} +EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_robust); + /** - * __blocking_notifier_call_chain - Call functions in a blocking notifier chain + * blocking_notifier_call_chain - Call functions in a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain. * * Calls each function in a notifier chain in turn. The functions * run in a process context, so they are allowed to block. @@ -268,9 +322,8 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v) { int ret = NOTIFY_DONE; @@ -281,19 +334,11 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, */ if (rcu_access_pointer(nh->head)) { down_read(&nh->rwsem); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, - nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); up_read(&nh->rwsem); } return ret; } -EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain); - -int blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v) -{ - return __blocking_notifier_call_chain(nh, val, v, -1, NULL); -} EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); /* @@ -335,13 +380,18 @@ int raw_notifier_chain_unregister(struct raw_notifier_head *nh, } EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); +int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v) +{ + return notifier_call_chain_robust(&nh->head, val_up, val_down, v); +} +EXPORT_SYMBOL_GPL(raw_notifier_call_chain_robust); + /** - * __raw_notifier_call_chain - Call functions in a raw notifier chain + * raw_notifier_call_chain - Call functions in a raw notifier chain * @nh: Pointer to head of the raw notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain * * Calls each function in a notifier chain in turn. The functions * run in an undefined context. @@ -354,18 +404,10 @@ EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __raw_notifier_call_chain(struct raw_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) -{ - return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); -} -EXPORT_SYMBOL_GPL(__raw_notifier_call_chain); - int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v) { - return __raw_notifier_call_chain(nh, val, v, -1, NULL); + return notifier_call_chain(&nh->head, val, v, -1, NULL); } EXPORT_SYMBOL_GPL(raw_notifier_call_chain); @@ -437,12 +479,10 @@ int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); /** - * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain + * srcu_notifier_call_chain - Call functions in an SRCU notifier chain * @nh: Pointer to head of the SRCU notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function - * @nr_to_call: See comment for notifier_call_chain. - * @nr_calls: See comment for notifier_call_chain * * Calls each function in a notifier chain in turn. The functions * run in a process context, so they are allowed to block. @@ -454,25 +494,17 @@ EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int srcu_notifier_call_chain(struct srcu_notifier_head *nh, + unsigned long val, void *v) { int ret; int idx; idx = srcu_read_lock(&nh->srcu); - ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); + ret = notifier_call_chain(&nh->head, val, v, -1, NULL); srcu_read_unlock(&nh->srcu, idx); return ret; } -EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain); - -int srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v) -{ - return __srcu_notifier_call_chain(nh, val, v, -1, NULL); -} EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); /** diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e7aa57fb2fdc..1dee70815f3c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -706,8 +706,8 @@ static int load_image_and_restore(void) */ int hibernate(void) { - int error, nr_calls = 0; bool snapshot_test = false; + int error; if (!hibernation_available()) { pm_pr_dbg("Hibernation not available.\n"); @@ -723,11 +723,9 @@ int hibernate(void) pr_info("hibernation entry\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Exit; - } + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto Restore; ksys_sync_helper(); @@ -785,7 +783,8 @@ int hibernate(void) /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_HIBERNATION); + Restore: pm_restore_console(); hibernate_release(); Unlock: @@ -804,7 +803,7 @@ int hibernate(void) */ int hibernate_quiet_exec(int (*func)(void *data), void *data) { - int error, nr_calls = 0; + int error; lock_system_sleep(); @@ -815,11 +814,9 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) pm_prepare_console(); - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto exit; - } + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto restore; error = freeze_processes(); if (error) @@ -880,8 +877,9 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) thaw_processes(); exit: - __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_HIBERNATION); +restore: pm_restore_console(); hibernate_release(); @@ -910,7 +908,7 @@ EXPORT_SYMBOL_GPL(hibernate_quiet_exec); */ static int software_resume(void) { - int error, nr_calls = 0; + int error; /* * If the user said "noresume".. bail out early. @@ -997,11 +995,9 @@ static int software_resume(void) pr_info("resume from hibernation\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Close_Finish; - } + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); + if (error) + goto Restore; pm_pr_dbg("Preparing processes for hibernation restore.\n"); error = freeze_processes(); @@ -1017,7 +1013,8 @@ static int software_resume(void) error = load_image_and_restore(); thaw_processes(); Finish: - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_RESTORE); + Restore: pm_restore_console(); pr_info("resume failed (%d)\n", error); hibernate_release(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 40f86ec4ab30..0aefd6f57e0a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -80,18 +80,18 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) +int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; - ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, - nr_to_call, nr_calls); + ret = blocking_notifier_call_chain_robust(&pm_chain_head, val_up, val_down, NULL); return notifier_to_errno(ret); } + int pm_notifier_call_chain(unsigned long val) { - return __pm_notifier_call_chain(val, -1, NULL); + return blocking_notifier_call_chain(&pm_chain_head, val, NULL); } /* If set, devices may be suspended and resumed asynchronously. */ diff --git a/kernel/power/power.h b/kernel/power/power.h index 32fc89ac96c3..24f12d534515 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -210,8 +210,7 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ -extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, - int *nr_calls); +extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8b1bb5ee7e5d..32391acc806b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -342,18 +342,16 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error, nr_calls = 0; + int error; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Finish; - } + error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND); + if (error) + goto Restore; trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -363,8 +361,8 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - Finish: - __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_SUSPEND); + Restore: pm_restore_console(); return error; } diff --git a/kernel/power/user.c b/kernel/power/user.c index d5eedc2baa2a..047f598f89a5 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -46,7 +46,7 @@ int is_hibernate_resume_dev(const struct inode *bd_inode) static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error, nr_calls = 0; + int error; if (!hibernation_available()) return -EPERM; @@ -73,9 +73,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; data->free_bitmaps = false; - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) - __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); } else { /* * Resuming. We may need to wait for the image device to @@ -85,15 +83,11 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } else - nr_calls--; - - if (error) - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + } } if (error) hibernate_release(); diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 46ff97e909c6..1bc36a1db14f 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -171,7 +171,7 @@ class SystemValues: tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, - '__pm_notifier_call_chain': {}, + 'pm_notifier_call_chain_robust': {}, 'pm_prepare_console': {}, 'pm_notifier_call_chain': {}, 'freeze_processes': {}, From 0340a6b7fb767f7f296b9bacc9a215920519a644 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:37 +0200 Subject: [PATCH 02/21] module: Fix up module_notifier return values While auditing all module notifiers I noticed a whole bunch of fail wrt the return value. Notifiers have a 'special' return semantics. As is; NOTIFY_DONE vs NOTIFY_OK is a bit vague; but notifier_from_errno(0) results in NOTIFY_OK and NOTIFY_DONE has a comment that says "Don't care". From this I've used NOTIFY_DONE when the function completely ignores the callback and notifier_to_error() isn't used. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Reviewed-by: Joel Fernandes (Google) Reviewed-by: Robert Richter Acked-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20200818135804.385360407@infradead.org --- drivers/oprofile/buffer_sync.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++++-- kernel/trace/trace.c | 2 +- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_printk.c | 4 ++-- kernel/tracepoint.c | 2 +- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 4d7695289eda..cc917865f13a 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -116,7 +116,7 @@ module_load_notify(struct notifier_block *self, unsigned long val, void *data) { #ifdef CONFIG_MODULES if (val != MODULE_STATE_COMING) - return 0; + return NOTIFY_DONE; /* FIXME: should we process all CPU buffers ? */ mutex_lock(&buffer_mutex); @@ -124,7 +124,7 @@ module_load_notify(struct notifier_block *self, unsigned long val, void *data) add_event_entry(MODULE_LOADED_CODE); mutex_unlock(&buffer_mutex); #endif - return 0; + return NOTIFY_OK; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a8d4f253ed77..2ecf7892a31b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2027,10 +2027,11 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op, { struct bpf_trace_module *btm, *tmp; struct module *mod = module; + int ret = 0; if (mod->num_bpf_raw_events == 0 || (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) - return 0; + goto out; mutex_lock(&bpf_module_mutex); @@ -2040,6 +2041,8 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op, if (btm) { btm->module = module; list_add(&btm->list, &bpf_trace_modules); + } else { + ret = -ENOMEM; } break; case MODULE_STATE_GOING: @@ -2055,7 +2058,8 @@ static int bpf_event_notify(struct notifier_block *nb, unsigned long op, mutex_unlock(&bpf_module_mutex); - return 0; +out: + return notifier_from_errno(ret); } static struct notifier_block bpf_module_nb = { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f40d850ebabc..df499922b6a4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9072,7 +9072,7 @@ static int trace_module_notify(struct notifier_block *self, break; } - return 0; + return NOTIFY_OK; } static struct notifier_block trace_module_nb = { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a85effb2373b..beebf2cd364b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2646,7 +2646,7 @@ static int trace_module_notify(struct notifier_block *self, mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return 0; + return NOTIFY_OK; } static struct notifier_block trace_module_nb = { diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index d4e31e969206..bb7783b90361 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -96,7 +96,7 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self, if (val == MODULE_STATE_COMING) hold_module_trace_bprintk_format(start, end); } - return 0; + return NOTIFY_OK; } /* @@ -174,7 +174,7 @@ __init static int module_trace_bprintk_format_notify(struct notifier_block *self, unsigned long val, void *data) { - return 0; + return NOTIFY_OK; } static inline const char ** find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 73956eaff8a9..8e05ed2cd39e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -521,7 +521,7 @@ static int tracepoint_module_notify(struct notifier_block *self, case MODULE_STATE_UNFORMED: break; } - return ret; + return notifier_from_errno(ret); } static struct notifier_block tracepoint_module_nb = { From 59cc8e0a906ea23190922e5e0252e5b5a60d70c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:38 +0200 Subject: [PATCH 03/21] module: Properly propagate MODULE_STATE_COMING failure Now that notifiers got unbroken; use the proper interface to handle notifier errors and propagate them. There were already MODULE_STATE_COMING notifiers that failed; notably: - jump_label_module_notifier() - tracepoint_module_notify() - bpf_event_notify() By propagating this error, we fix those users. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Acked-by: Jessica Yu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20200818135804.444372853@infradead.org --- kernel/module.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 1c5cff34d9f2..3c465cf31d08 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3792,9 +3792,13 @@ static int prepare_coming_module(struct module *mod) if (err) return err; - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_COMING, mod); - return 0; + err = blocking_notifier_call_chain_robust(&module_notify_list, + MODULE_STATE_COMING, MODULE_STATE_GOING, mod); + err = notifier_to_errno(err); + if (err) + klp_module_going(mod); + + return err; } static int unknown_module_param_cb(char *param, char *val, const char *modname, From 0db6e3734b130207026df1e78455fa98ca1d6f50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:39 +0200 Subject: [PATCH 04/21] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved() Nothing ensures the module exists while we're iterating mod->jump_entries in __jump_label_mod_text_reserved(), take a module reference to ensure the module sticks around. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20200818135804.504501338@infradead.org --- kernel/jump_label.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index cdb3ffab128b..e661c61b3d6b 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -539,19 +539,25 @@ static void static_key_set_mod(struct static_key *key, static int __jump_label_mod_text_reserved(void *start, void *end) { struct module *mod; + int ret; preempt_disable(); mod = __module_text_address((unsigned long)start); WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; preempt_enable(); if (!mod) return 0; - - return __jump_label_text_reserved(mod->jump_entries, + ret = __jump_label_text_reserved(mod->jump_entries, mod->jump_entries + mod->num_jump_entries, start, end); + + module_put(mod); + + return ret; } static void __jump_label_mod_update(struct static_key *key) From 563a02b0c9704f69c0364befedd451f57fe88092 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:40 +0200 Subject: [PATCH 05/21] compiler.h: Make __ADDRESSABLE() symbol truly unique The __ADDRESSABLE() macro uses the __LINE__ macro to create a temporary symbol which has a unique name. However, if the macro is used multiple times from within another macro, the line number will always be the same, resulting in duplicate symbols. Make the temporary symbols truly unique by using __UNIQUE_ID instead of __LINE__. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200818135804.564436253@infradead.org --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6810d80acb0b..92ef163a7479 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -207,7 +207,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __ADDRESSABLE(sym) \ static void * __section(.discard.addressable) __used \ - __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; /** * offset_to_ptr - convert a relative memory offset to an absolute pointer From 115284d89a436e9b66da0c6c4f6efded806874b2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:41 +0200 Subject: [PATCH 06/21] static_call: Add basic static call infrastructure Static calls are a replacement for global function pointers. They use code patching to allow direct calls to be used instead of indirect calls. They give the flexibility of function pointers, but with improved performance. This is especially important for cases where retpolines would otherwise be used, as retpolines can significantly impact performance. The concept and code are an extension of previous work done by Ard Biesheuvel and Steven Rostedt: https://lkml.kernel.org/r/20181005081333.15018-1-ard.biesheuvel@linaro.org https://lkml.kernel.org/r/20181006015110.653946300@goodmis.org There are two implementations, depending on arch support: 1) out-of-line: patched trampolines (CONFIG_HAVE_STATIC_CALL) 2) basic function pointers For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.623259796@infradead.org --- arch/Kconfig | 3 + include/linux/static_call.h | 156 ++++++++++++++++++++++++++++++ include/linux/static_call_types.h | 15 +++ 3 files changed, 174 insertions(+) create mode 100644 include/linux/static_call.h create mode 100644 include/linux/static_call_types.h diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..806e6df7b47c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -975,6 +975,9 @@ config HAVE_SPARSE_SYSCALL_NR config ARCH_HAS_VDSO_DATA bool +config HAVE_STATIC_CALL + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/linux/static_call.h b/include/linux/static_call.h new file mode 100644 index 000000000000..d8892dff2e91 --- /dev/null +++ b/include/linux/static_call.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_STATIC_CALL_H +#define _LINUX_STATIC_CALL_H + +/* + * Static call support + * + * Static calls use code patching to hard-code function pointers into direct + * branch instructions. They give the flexibility of function pointers, but + * with improved performance. This is especially important for cases where + * retpolines would otherwise be used, as retpolines can significantly impact + * performance. + * + * + * API overview: + * + * DECLARE_STATIC_CALL(name, func); + * DEFINE_STATIC_CALL(name, func); + * static_call(name)(args...); + * static_call_update(name, func); + * + * Usage example: + * + * # Start with the following functions (with identical prototypes): + * int func_a(int arg1, int arg2); + * int func_b(int arg1, int arg2); + * + * # Define a 'my_name' reference, associated with func_a() by default + * DEFINE_STATIC_CALL(my_name, func_a); + * + * # Call func_a() + * static_call(my_name)(arg1, arg2); + * + * # Update 'my_name' to point to func_b() + * static_call_update(my_name, &func_b); + * + * # Call func_b() + * static_call(my_name)(arg1, arg2); + * + * + * Implementation details: + * + * This requires some arch-specific code (CONFIG_HAVE_STATIC_CALL). + * Otherwise basic indirect calls are used (with function pointers). + * + * Each static_call() site calls into a trampoline associated with the name. + * The trampoline has a direct branch to the default function. Updates to a + * name will modify the trampoline's branch destination. + * + * If the arch has CONFIG_HAVE_STATIC_CALL_INLINE, then the call sites + * themselves will be patched at runtime to call the functions directly, + * rather than calling through the trampoline. This requires objtool or a + * compiler plugin to detect all the static_call() sites and annotate them + * in the .static_call_sites section. + */ + +#include +#include +#include + +#ifdef CONFIG_HAVE_STATIC_CALL +#include + +/* + * Either @site or @tramp can be NULL. + */ +extern void arch_static_call_transform(void *site, void *tramp, void *func); + +#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __static_call(name) \ +({ \ + __ADDRESSABLE(STATIC_CALL_KEY(name)); \ + &STATIC_CALL_TRAMP(name); \ +}) + +#else +#define STATIC_CALL_TRAMP_ADDR(name) NULL +#endif + + +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#define static_call_update(name, func) \ +({ \ + BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ + __static_call_update(&STATIC_CALL_KEY(name), \ + STATIC_CALL_TRAMP_ADDR(name), func); \ +}) + +#if defined(CONFIG_HAVE_STATIC_CALL) + +struct static_call_key { + void *func; +}; + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + +#define static_call(name) __static_call(name) + +static inline +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + cpus_read_lock(); + WRITE_ONCE(key->func, func); + arch_static_call_transform(NULL, tramp, func); + cpus_read_unlock(); +} + +#define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) + +#define EXPORT_STATIC_CALL_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + +#else /* Generic implementation */ + +struct static_call_key { + void *func; +}; + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + } + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +static inline +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + WRITE_ONCE(key->func, func); +} + +#define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name)) +#define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + +#endif /* _LINUX_STATIC_CALL_H */ diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h new file mode 100644 index 000000000000..5ed249dc47d3 --- /dev/null +++ b/include/linux/static_call_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +#endif /* _STATIC_CALL_TYPES_H */ From 9183c3f9ed710a8edf1a61e8a96d497258d26e08 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:42 +0200 Subject: [PATCH 07/21] static_call: Add inline static call infrastructure Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE option, which is a faster version of CONFIG_HAVE_STATIC_CALL. At runtime, the static call sites are patched directly, rather than using the out-of-line trampolines. Compared to out-of-line static calls, the performance benefits are more modest, but still measurable. Steven Rostedt did some tracepoint measurements: https://lkml.kernel.org/r/20181126155405.72b4f718@gandalf.local.home This code is heavily inspired by the jump label code (aka "static jumps"), as some of the concepts are very similar. For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface; merged trampolines] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.684334440@infradead.org --- arch/Kconfig | 4 + include/asm-generic/vmlinux.lds.h | 7 + include/linux/module.h | 5 + include/linux/static_call.h | 36 +++- include/linux/static_call_types.h | 13 ++ kernel/Makefile | 1 + kernel/module.c | 5 + kernel/static_call.c | 303 ++++++++++++++++++++++++++++++ 8 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 kernel/static_call.c diff --git a/arch/Kconfig b/arch/Kconfig index 806e6df7b47c..2c4936ac620a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -978,6 +978,10 @@ config ARCH_HAS_VDSO_DATA config HAVE_STATIC_CALL bool +config HAVE_STATIC_CALL_INLINE + bool + depends on HAVE_STATIC_CALL + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5430febd34be..0088a5cd6a40 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -388,6 +388,12 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#define STATIC_CALL_DATA \ + . = ALIGN(8); \ + __start_static_call_sites = .; \ + KEEP(*(.static_call_sites)) \ + __stop_static_call_sites = .; + /* * Allow architectures to handle ro_after_init data on their * own by defining an empty RO_AFTER_INIT_DATA. @@ -398,6 +404,7 @@ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ + STATIC_CALL_DATA \ __end_ro_after_init = .; #endif diff --git a/include/linux/module.h b/include/linux/module.h index e30ed5fa33a7..a29187f7c360 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -498,6 +499,10 @@ struct module { unsigned long *kprobe_blacklist; unsigned int num_kprobe_blacklist; #endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + int num_static_call_sites; + struct static_call_site *static_call_sites; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index d8892dff2e91..0d7f9efaa3b2 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -95,7 +95,41 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func); STATIC_CALL_TRAMP_ADDR(name), func); \ }) -#if defined(CONFIG_HAVE_STATIC_CALL) +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +struct static_call_mod { + struct static_call_mod *next; + struct module *mod; /* for vmlinux, mod == NULL */ + struct static_call_site *sites; +}; + +struct static_call_key { + void *func; + struct static_call_mod *mods; +}; + +extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); +extern int static_call_mod_init(struct module *mod); + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + .mods = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + +#define static_call(name) __static_call(name) + +#define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) + +#define EXPORT_STATIC_CALL_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + +#elif defined(CONFIG_HAVE_STATIC_CALL) struct static_call_key { void *func; diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 5ed249dc47d3..408d345d83e1 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -2,14 +2,27 @@ #ifndef _STATIC_CALL_TYPES_H #define _STATIC_CALL_TYPES_H +#include #include #define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 9a20016d4900..b74820d8b264 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/module.c b/kernel/module.c index 3c465cf31d08..c075a18103fb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3274,6 +3274,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist", sizeof(unsigned long), &mod->num_kprobe_blacklist); +#endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + mod->static_call_sites = section_objs(info, ".static_call_sites", + sizeof(*mod->static_call_sites), + &mod->num_static_call_sites); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/kernel/static_call.c b/kernel/static_call.c new file mode 100644 index 000000000000..d24349244675 --- /dev/null +++ b/kernel/static_call.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; + +static bool static_call_initialized; + +#define STATIC_CALL_INIT 1UL + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (struct static_call_key *) + (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + struct static_call_site *site, *stop; + struct static_call_mod *site_mod; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + arch_static_call_transform(NULL, tramp, func); + + /* + * If uninitialized, we'll not update the callsites, but they still + * point to the trampoline and we just patched that. + */ + if (WARN_ON_ONCE(!static_call_initialized)) + goto done; + + for (site_mod = key->mods; site_mod; site_mod = site_mod->next) { + struct module *mod = site_mod->mod; + + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + */ + continue; + } + + stop = __stop_static_call_sites; + +#ifdef CONFIG_MODULES + if (mod) { + stop = mod->static_call_sites + + mod->num_static_call_sites; + } +#endif + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + + if (static_call_is_init(site)) { + /* + * Don't write to call sites which were in + * initmem and have since been freed. + */ + if (!mod && system_state >= SYSTEM_RUNNING) + continue; + if (mod && !within_module_init((unsigned long)site_addr, mod)) + continue; + } + + if (!kernel_text_address((unsigned long)site_addr)) { + WARN_ONCE(1, "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, NULL, func); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +static int __static_call_init(struct module *mod, + struct static_call_site *start, + struct static_call_site *stop) +{ + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if ((mod && within_module_init((unsigned long)site_addr, mod)) || + (!mod && init_section_contains(site_addr, 1))) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + site_mod->mod = mod; + site_mod->sites = site; + site_mod->next = key->mods; + key->mods = site_mod; + } + + arch_static_call_transform(site_addr, NULL, key->func); + } + + return 0; +} + +#ifdef CONFIG_MODULES + +static int static_call_add_module(struct module *mod) +{ + return __static_call_init(mod, mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites); +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod, **prev; + struct static_call_site *site; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + + prev_key = key; + + for (prev = &key->mods, site_mod = key->mods; + site_mod && site_mod->mod != mod; + prev = &site_mod->next, site_mod = site_mod->next) + ; + + if (!site_mod) + continue; + + *prev = site_mod->next; + kfree(site_mod); + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + ret = static_call_add_module(mod); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#endif /* CONFIG_MODULES */ + +static void __init static_call_init(void) +{ + int ret; + + if (static_call_initialized) + return; + + cpus_read_lock(); + static_call_lock(); + ret = __static_call_init(NULL, __start_static_call_sites, + __stop_static_call_sites); + static_call_unlock(); + cpus_read_unlock(); + + if (ret) { + pr_err("Failed to allocate memory for static_call!\n"); + BUG(); + } + + static_call_initialized = true; + +#ifdef CONFIG_MODULES + register_module_notifier(&static_call_module_nb); +#endif +} +early_initcall(static_call_init); From 6333e8f73b834f54e395a056e6002403f0862c51 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:43 +0200 Subject: [PATCH 08/21] static_call: Avoid kprobes on inline static_call()s Similar to how we disallow kprobes on any other dynamic text (ftrace/jump_label) also disallow kprobes on inline static_call()s. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135804.744920586@infradead.org --- arch/x86/kernel/kprobes/opt.c | 4 ++- include/linux/static_call.h | 11 ++++++ kernel/kprobes.c | 2 ++ kernel/static_call.c | 68 +++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 40f380461e6d..c068e21c2c40 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -210,7 +211,8 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) + jump_label_text_reserved(src, src + len - 1) || + static_call_text_reserved(src, src + len - 1)) return -EBUSY; return len; diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0d7f9efaa3b2..6f62ceda7dd9 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -110,6 +110,7 @@ struct static_call_key { extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); extern int static_call_mod_init(struct module *mod); +extern int static_call_text_reserved(void *start, void *end); #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -153,6 +154,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) cpus_read_unlock(); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) @@ -182,6 +188,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) WRITE_ONCE(key->func, func); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name)) #define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..67e6a8c18007 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1634,6 +1635,7 @@ static int check_kprobe_address_safe(struct kprobe *p, if (!kernel_text_address((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || + static_call_text_reserved(p->addr, p->addr) || find_bug((unsigned long)p->addr)) { ret = -EINVAL; goto out; diff --git a/kernel/static_call.c b/kernel/static_call.c index d24349244675..753b2f1b4fb8 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -204,8 +204,58 @@ static int __static_call_init(struct module *mod, return 0; } +static int addr_conflict(struct static_call_site *site, void *start, void *end) +{ + unsigned long addr = (unsigned long)static_call_addr(site); + + if (addr <= (unsigned long)end && + addr + CALL_INSN_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +static int __static_call_text_reserved(struct static_call_site *iter_start, + struct static_call_site *iter_stop, + void *start, void *end) +{ + struct static_call_site *iter = iter_start; + + while (iter < iter_stop) { + if (addr_conflict(iter, start, end)) + return 1; + iter++; + } + + return 0; +} + #ifdef CONFIG_MODULES +static int __static_call_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + int ret; + + preempt_disable(); + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + preempt_enable(); + + if (!mod) + return 0; + + ret = __static_call_text_reserved(mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites, + start, end); + + module_put(mod); + + return ret; +} + static int static_call_add_module(struct module *mod) { return __static_call_init(mod, mod->static_call_sites, @@ -273,8 +323,26 @@ static struct notifier_block static_call_module_nb = { .notifier_call = static_call_module_notify, }; +#else + +static inline int __static_call_mod_text_reserved(void *start, void *end) +{ + return 0; +} + #endif /* CONFIG_MODULES */ +int static_call_text_reserved(void *start, void *end) +{ + int ret = __static_call_text_reserved(__start_static_call_sites, + __stop_static_call_sites, start, end); + + if (ret) + return ret; + + return __static_call_mod_text_reserved(start, end); +} + static void __init static_call_init(void) { int ret; From e6d6c071f22de29e4993784fc00cd2202b7ba149 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:44 +0200 Subject: [PATCH 09/21] x86/static_call: Add out-of-line static call implementation Add the x86 out-of-line static call implementation. For each key, a permanent trampoline is created which is the destination for all static calls for the given key. The trampoline has a direct jump which gets patched by static_call_update() when the destination function changes. [peterz: fixed trampoline, rewrote patching code] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.804315175@infradead.org --- arch/x86/Kconfig | 1 + arch/x86/include/asm/static_call.h | 23 ++++++++++++++++++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/static_call.c | 31 ++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 arch/x86/include/asm/static_call.h create mode 100644 arch/x86/kernel/static_call.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..595c06b32b3a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -215,6 +215,7 @@ config X86 select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 + select HAVE_STATIC_CALL select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h new file mode 100644 index 000000000000..07aa8791cbfe --- /dev/null +++ b/arch/x86/include/asm/static_call.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STATIC_CALL_H +#define _ASM_STATIC_CALL_H + +#include + +/* + * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which + * does a direct jump to the function. The direct jump gets patched by + * static_call_update(). + */ +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + asm(".pushsection .text, \"ax\" \n" \ + ".align 4 \n" \ + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + " .byte 0xe9 # jmp.d32 \n" \ + " .long " #func " - (. + 4) \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") + +#endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e77261db2391..de09af019e23 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -68,6 +68,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o obj-y += irqflags.o +obj-y += static_call.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c new file mode 100644 index 000000000000..0565825970af --- /dev/null +++ b/arch/x86/kernel/static_call.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +static void __static_call_transform(void *insn, u8 opcode, void *func) +{ + const void *code = text_gen_insn(opcode, insn, func); + + if (WARN_ONCE(*(u8 *)insn != opcode, + "unexpected static call insn opcode 0x%x at %pS\n", + opcode, insn)) + return; + + if (memcmp(insn, code, CALL_INSN_SIZE) == 0) + return; + + text_poke_bp(insn, code, CALL_INSN_SIZE, NULL); +} + +void arch_static_call_transform(void *site, void *tramp, void *func) +{ + mutex_lock(&text_mutex); + + if (tramp) + __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + + mutex_unlock(&text_mutex); +} +EXPORT_SYMBOL_GPL(arch_static_call_transform); From 1e7e47883830aae5e8246a22ca2fc6883c61acdf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:45 +0200 Subject: [PATCH 10/21] x86/static_call: Add inline static call implementation for x86-64 Add the inline static call implementation for x86-64. The generated code is identical to the out-of-line case, except we move the trampoline into it's own section. Objtool uses the trampoline naming convention to detect all the call sites. It then annotates those call sites in the .static_call_sites section. During boot (and module init), the call sites are patched to call directly into the destination function. The temporary trampoline is then no longer used. [peterz: merged trampolines, put trampoline in section] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.864271425@infradead.org --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/static_call.h | 13 ++- arch/x86/kernel/static_call.c | 3 + arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 6 ++ tools/include/linux/static_call_types.h | 28 +++++ tools/objtool/check.c | 130 ++++++++++++++++++++++++ tools/objtool/check.h | 1 + tools/objtool/elf.c | 8 +- tools/objtool/elf.h | 3 +- tools/objtool/objtool.h | 1 + tools/objtool/orc_gen.c | 4 +- tools/objtool/sync-check.sh | 1 + 13 files changed, 193 insertions(+), 9 deletions(-) create mode 100644 tools/include/linux/static_call_types.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 595c06b32b3a..8a48d3eedb84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -216,6 +216,7 @@ config X86 select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL + select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK @@ -231,6 +232,7 @@ config X86 select RTC_MC146818_LIB select SPARSE_IRQ select SRCU + select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE) select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT @@ -452,7 +454,6 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y - select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 07aa8791cbfe..33469ae3612c 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -5,12 +5,23 @@ #include /* + * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which + * uses the current value of the key->func pointer to do an indirect jump to + * the function. This trampoline is only used during boot, before the call + * sites get patched by static_call_update(). The name of this trampoline has + * a magical aspect: objtool uses it to find static call sites so it can create + * the .static_call_sites section. + * * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which * does a direct jump to the function. The direct jump gets patched by * static_call_update(). + * + * Having the trampoline in a special section forces GCC to emit a JMP.d32 when + * it does tail-call optimization on the call; since you cannot compute the + * relative displacement across sections. */ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ - asm(".pushsection .text, \"ax\" \n" \ + asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 0565825970af..5ff2b639a1a6 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -26,6 +26,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func) if (tramp) __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) + __static_call_transform(site, CALL_INSN_OPCODE, func); + mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9a03e5b23135..2568f4cdcbd1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -136,6 +136,7 @@ SECTIONS ENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT + STATIC_CALL_TEXT *(.fixup) *(.gnu.warning) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0088a5cd6a40..0502087654d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -642,6 +642,12 @@ *(.softirqentry.text) \ __softirqentry_text_end = .; +#define STATIC_CALL_TEXT \ + ALIGN_FUNCTION(); \ + __static_call_text_start = .; \ + *(.static_call.text) \ + __static_call_text_end = .; + /* Section used for early init (in .S files) */ #define HEAD_TEXT KEEP(*(.head.text)) diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h new file mode 100644 index 000000000000..408d345d83e1 --- /dev/null +++ b/tools/include/linux/static_call_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include +#include + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + +#endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..f8f7a40c6ef3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -16,6 +16,7 @@ #include #include +#include #define FAKE_JUMP_OFFSET -1 @@ -433,6 +434,103 @@ static int add_dead_ends(struct objtool_file *file) return 0; } +static int create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -1522,6 +1620,23 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +static int read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1601,6 +1716,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + return 0; } @@ -2432,6 +2551,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; + if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + break; case INSN_JUMP_CONDITIONAL: @@ -2791,6 +2915,7 @@ int check(const char *_objname, bool orc) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.static_call_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; @@ -2838,6 +2963,11 @@ int check(const char *_objname, bool orc) warnings += ret; } + ret = create_static_call_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + if (orc) { ret = create_orc(&file); if (ret < 0) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 061aa96e15d3..36d38b9153ac 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head static_call_node; struct section *sec; unsigned long offset; unsigned int len; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3ddbd66f1a37..4e1d7460574b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -652,7 +652,7 @@ struct elf *elf_open_read(const char *name, int flags) } struct section *elf_create_section(struct elf *elf, const char *name, - size_t entsize, int nr) + unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; @@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_entsize = entsize; sec->sh.sh_type = SHT_PROGBITS; sec->sh.sh_addralign = 1; - sec->sh.sh_flags = SHF_ALLOC; + sec->sh.sh_flags = SHF_ALLOC | sh_flags; /* Add section name to .shstrtab (or .strtab for Clang) */ @@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect strcpy(relocname, ".rel"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0); free(relocname); if (!sec) return NULL; @@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0); + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 6cc80a075166..807f8c670097 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -56,6 +56,7 @@ struct symbol { unsigned int len; struct symbol *pfunc, *cfunc, *alias; bool uaccess_safe; + bool static_call_tramp; }; struct reloc { @@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc) } struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); void elf_add_reloc(struct elf *elf, struct reloc *reloc); int elf_write_insn(struct elf *elf, struct section *sec, diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 528028a66816..9a7cd0b88bd8 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -16,6 +16,7 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; bool ignore_unreachables, c_file, hints, rodata; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 968f55e6dd94..e6b2363c2e03 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); if (!sec) return -1; @@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file) return -1; /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", + u_sec = elf_create_section(file->elf, ".orc_unwind", 0, sizeof(struct orc_entry), idx); /* populate sections */ diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 2a1261bfbb62..aa099b21dffa 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk +include/linux/static_call_types.h ' check_2 () { From f03c412915f5f69f2d17bcd20ecdd69320bcbf7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:46 +0200 Subject: [PATCH 11/21] static_call: Add simple self-test for static calls Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135804.922581202@infradead.org --- arch/Kconfig | 6 ++++++ kernel/static_call.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 2c4936ac620a..76ec3395b843 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -106,6 +106,12 @@ config STATIC_KEYS_SELFTEST help Boot time self-test of the branch patching code. +config STATIC_CALL_SELFTEST + bool "Static call selftest" + depends on HAVE_STATIC_CALL + help + Boot time self-test of the call patching code. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/kernel/static_call.c b/kernel/static_call.c index 753b2f1b4fb8..97142cb6bfa6 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -369,3 +369,46 @@ static void __init static_call_init(void) #endif } early_initcall(static_call_init); + +#ifdef CONFIG_STATIC_CALL_SELFTEST + +static int func_a(int x) +{ + return x+1; +} + +static int func_b(int x) +{ + return x+2; +} + +DEFINE_STATIC_CALL(sc_selftest, func_a); + +static struct static_call_data { + int (*func)(int); + int val; + int expect; +} static_call_data [] __initdata = { + { NULL, 2, 3 }, + { func_b, 2, 4 }, + { func_a, 2, 3 } +}; + +static int __init test_static_call_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { + struct static_call_data *scd = &static_call_data[i]; + + if (scd->func) + static_call_update(sc_selftest, scd->func); + + WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); + } + + return 0; +} +early_initcall(test_static_call_init); + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ From c43a43e439e00ad2a4d98716895d961ade6bbbfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:47 +0200 Subject: [PATCH 12/21] x86/alternatives: Teach text_poke_bp() to emulate RET Future patches will need to poke a RET instruction, provide the infrastructure required for this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20200818135804.982214828@infradead.org --- arch/x86/include/asm/text-patching.h | 19 +++++++++++++++++++ arch/x86/kernel/alternative.c | 5 +++++ 2 files changed, 24 insertions(+) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 6593b42cb379..b7421780e4e9 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -53,6 +53,9 @@ extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC +#define RET_INSN_SIZE 1 +#define RET_INSN_OPCODE 0xC3 + #define CALL_INSN_SIZE 5 #define CALL_INSN_OPCODE 0xE8 @@ -73,6 +76,7 @@ static __always_inline int text_opcode_size(u8 opcode) switch(opcode) { __CASE(INT3); + __CASE(RET); __CASE(CALL); __CASE(JMP32); __CASE(JMP8); @@ -140,12 +144,27 @@ void int3_emulate_push(struct pt_regs *regs, unsigned long val) *(unsigned long *)regs->sp = val; } +static __always_inline +unsigned long int3_emulate_pop(struct pt_regs *regs) +{ + unsigned long val = *(unsigned long *)regs->sp; + regs->sp += sizeof(unsigned long); + return val; +} + static __always_inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) { int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } + +static __always_inline +void int3_emulate_ret(struct pt_regs *regs) +{ + unsigned long ip = int3_emulate_pop(regs); + int3_emulate_jmp(regs, ip); +} #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cdaab30880b9..4adbe65afe23 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1103,6 +1103,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs) */ goto out_put; + case RET_INSN_OPCODE: + int3_emulate_ret(regs); + break; + case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); break; @@ -1277,6 +1281,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, switch (tp->opcode) { case INT3_INSN_OPCODE: + case RET_INSN_OPCODE: break; case CALL_INSN_OPCODE: From 452cddbff74b6a15b9354505671011700fe03710 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:48 +0200 Subject: [PATCH 13/21] static_call: Add static_call_cond() Extend the static_call infrastructure to optimize the following common pattern: if (func_ptr) func_ptr(args...) For the trampoline (which is in effect a tail-call), we patch the JMP.d32 into a RET, which then directly consumes the trampoline call. For the in-line sites we replace the CALL with a NOP5. NOTE: this is 'obviously' limited to functions with a 'void' return type. NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed to a full function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org --- arch/x86/include/asm/static_call.h | 12 +++-- arch/x86/kernel/static_call.c | 44 +++++++++++---- include/linux/static_call.h | 86 ++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 33469ae3612c..c37f11999d0c 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -20,15 +20,21 @@ * it does tail-call optimization on the call; since you cannot compute the * relative displacement across sections. */ -#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + +#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ STATIC_CALL_TRAMP_STR(name) ": \n" \ - " .byte 0xe9 # jmp.d32 \n" \ - " .long " #func " - (. + 4) \n" \ + insns " \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 5ff2b639a1a6..ead6726fb06d 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -4,19 +4,41 @@ #include #include -static void __static_call_transform(void *insn, u8 opcode, void *func) +enum insn_type { + CALL = 0, /* site call */ + NOP = 1, /* site cond-call */ + JMP = 2, /* tramp / site tail-call */ + RET = 3, /* tramp / site cond-tail-call */ +}; + +static void __static_call_transform(void *insn, enum insn_type type, void *func) { - const void *code = text_gen_insn(opcode, insn, func); + int size = CALL_INSN_SIZE; + const void *code; - if (WARN_ONCE(*(u8 *)insn != opcode, - "unexpected static call insn opcode 0x%x at %pS\n", - opcode, insn)) + switch (type) { + case CALL: + code = text_gen_insn(CALL_INSN_OPCODE, insn, func); + break; + + case NOP: + code = ideal_nops[NOP_ATOMIC5]; + break; + + case JMP: + code = text_gen_insn(JMP32_INSN_OPCODE, insn, func); + break; + + case RET: + code = text_gen_insn(RET_INSN_OPCODE, insn, func); + size = RET_INSN_SIZE; + break; + } + + if (memcmp(insn, code, size) == 0) return; - if (memcmp(insn, code, CALL_INSN_SIZE) == 0) - return; - - text_poke_bp(insn, code, CALL_INSN_SIZE, NULL); + text_poke_bp(insn, code, size, NULL); } void arch_static_call_transform(void *site, void *tramp, void *func) @@ -24,10 +46,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func) mutex_lock(&text_mutex); if (tramp) - __static_call_transform(tramp, JMP32_INSN_OPCODE, func); + __static_call_transform(tramp, func ? JMP : RET, func); if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) - __static_call_transform(site, CALL_INSN_OPCODE, func); + __static_call_transform(site, func ? CALL : NOP, func); mutex_unlock(&text_mutex); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 6f62ceda7dd9..0f74581e0e2f 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -16,7 +16,9 @@ * * DECLARE_STATIC_CALL(name, func); * DEFINE_STATIC_CALL(name, func); + * DEFINE_STATIC_CALL_NULL(name, typename); * static_call(name)(args...); + * static_call_cond(name)(args...); * static_call_update(name, func); * * Usage example: @@ -52,6 +54,43 @@ * rather than calling through the trampoline. This requires objtool or a * compiler plugin to detect all the static_call() sites and annotate them * in the .static_call_sites section. + * + * + * Notes on NULL function pointers: + * + * Static_call()s support NULL functions, with many of the caveats that + * regular function pointers have. + * + * Clearly calling a NULL function pointer is 'BAD', so too for + * static_call()s (although when HAVE_STATIC_CALL it might not be immediately + * fatal). A NULL static_call can be the result of: + * + * DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int)); + * + * which is equivalent to declaring a NULL function pointer with just a + * typename: + * + * void (*my_func_ptr)(int arg1) = NULL; + * + * or using static_call_update() with a NULL function. In both cases the + * HAVE_STATIC_CALL implementation will patch the trampoline with a RET + * instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE + * architectures can patch the trampoline call to a NOP. + * + * In all cases, any argument evaluation is unconditional. Unlike a regular + * conditional function pointer call: + * + * if (my_func_ptr) + * my_func_ptr(arg1) + * + * where the argument evaludation also depends on the pointer value. + * + * When calling a static_call that can be NULL, use: + * + * static_call_cond(name)(arg1); + * + * which will include the required value tests to avoid NULL-pointer + * dereferences. */ #include @@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end); }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ @@ -143,7 +191,15 @@ struct static_call_key { }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) @@ -179,9 +235,39 @@ struct static_call_key { .func = _func, \ } +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + } + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) +static inline void __static_call_nop(void) { } + +/* + * This horrific hack takes care of two things: + * + * - it ensures the compiler will only load the function pointer ONCE, + * which avoids a reload race. + * + * - it ensures the argument evaluation is unconditional, similar + * to the HAVE_STATIC_CALL variant. + * + * Sadly current GCC/Clang (10 for both) do not optimize this properly + * and will emit an indirect call for the NULL case :-( + */ +#define __static_call_cond(name) \ +({ \ + void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \ + if (!func) \ + func = &__static_call_nop; \ + (typeof(STATIC_CALL_TRAMP(name))*)func; \ +}) + +#define static_call_cond(name) (void)__static_call_cond(name) + static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) { From 5b06fd3bb9cdce4f3e731c48eb5b74c4acc47997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:49 +0200 Subject: [PATCH 14/21] static_call: Handle tail-calls GCC can turn our static_call(name)(args...) into a tail call, in which case we get a JMP.d32 into the trampoline (which then does a further tail-call). Teach objtool to recognise and mark these in .static_call_sites and adjust the code patching to deal with this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.101186767@infradead.org --- arch/x86/kernel/static_call.c | 21 ++++++++++++++++++--- include/linux/static_call.h | 4 ++-- include/linux/static_call_types.h | 7 +++++++ kernel/static_call.c | 21 +++++++++++++-------- tools/include/linux/static_call_types.h | 7 +++++++ tools/objtool/check.c | 18 +++++++++++++----- 6 files changed, 60 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ead6726fb06d..60a325c731df 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -41,15 +41,30 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) text_poke_bp(insn, code, size, NULL); } -void arch_static_call_transform(void *site, void *tramp, void *func) +static inline enum insn_type __sc_insn(bool null, bool tail) +{ + /* + * Encode the following table without branches: + * + * tail null insn + * -----+-------+------ + * 0 | 0 | CALL + * 0 | 1 | NOP + * 1 | 0 | JMP + * 1 | 1 | RET + */ + return 2*tail + null; +} + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); if (tramp) - __static_call_transform(tramp, func ? JMP : RET, func); + __static_call_transform(tramp, __sc_insn(!func, true), func); if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) - __static_call_transform(site, func ? CALL : NOP, func); + __static_call_transform(site, __sc_insn(!func, tail), func); mutex_unlock(&text_mutex); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0f74581e0e2f..519bd666e096 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -103,7 +103,7 @@ /* * Either @site or @tramp can be NULL. */ -extern void arch_static_call_transform(void *site, void *tramp, void *func); +extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail); #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) @@ -206,7 +206,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) { cpus_read_lock(); WRITE_ONCE(key->func, func); - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); cpus_read_unlock(); } diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/kernel/static_call.c b/kernel/static_call.c index 97142cb6bfa6..d98e0e4272c1 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -15,8 +15,6 @@ extern struct static_call_site __start_static_call_sites[], static bool static_call_initialized; -#define STATIC_CALL_INIT 1UL - /* mutex to protect key modules/sites */ static DEFINE_MUTEX(static_call_mutex); @@ -39,18 +37,23 @@ static inline void *static_call_addr(struct static_call_site *site) static inline struct static_call_key *static_call_key(const struct static_call_site *site) { return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT); + (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT; + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) - + site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -104,7 +107,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) key->func = func; - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); /* * If uninitialized, we'll not update the callsites, but they still @@ -154,7 +157,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; } - arch_static_call_transform(site_addr, NULL, func); + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); } } @@ -198,7 +202,8 @@ static int __static_call_init(struct module *mod, key->mods = site_mod; } - arch_static_call_transform(site_addr, NULL, key->func); + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); } return 0; diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f8f7a40c6ef3..75d0cd2f9044 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -516,7 +516,7 @@ static int create_static_call_sections(struct objtool_file *file) } memset(reloc, 0, sizeof(*reloc)); reloc->sym = key_sym; - reloc->addend = 0; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site) + 4; reloc->sec = reloc_sec; @@ -747,6 +747,10 @@ static int add_jump_destinations(struct objtool_file *file) } else { /* external sibling call */ insn->call_dest = reloc->sym; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } continue; } @@ -798,6 +802,10 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call */ insn->call_dest = insn->jump_dest->func; + if (insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } } } } @@ -1684,6 +1692,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_static_call_tramps(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; @@ -1716,10 +1728,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_static_call_tramps(file); - if (ret) - return ret; - return 0; } From 6c3fce794e9d2a5ce3a948962d0808a459c40a84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:50 +0200 Subject: [PATCH 15/21] static_call: Add some validation Verify the text we're about to change is as we expect it to be. Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135805.161974981@infradead.org --- arch/x86/kernel/static_call.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 60a325c731df..55140d8db106 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -41,6 +41,26 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) text_poke_bp(insn, code, size, NULL); } +static void __static_call_validate(void *insn, bool tail) +{ + u8 opcode = *(u8 *)insn; + + if (tail) { + if (opcode == JMP32_INSN_OPCODE || + opcode == RET_INSN_OPCODE) + return; + } else { + if (opcode == CALL_INSN_OPCODE || + !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5)) + return; + } + + /* + * If we ever trigger this, our text is corrupt, we'll probably not live long. + */ + WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn); +} + static inline enum insn_type __sc_insn(bool null, bool tail) { /* @@ -60,11 +80,15 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { mutex_lock(&text_mutex); - if (tramp) + if (tramp) { + __static_call_validate(tramp, true); __static_call_transform(tramp, __sc_insn(!func, true), func); + } - if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail); __static_call_transform(site, __sc_insn(!func, tail), func); + } mutex_unlock(&text_mutex); } From a945c8345ec0decb2f1a7f19a8c5e60bcb1dd1eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:51 +0200 Subject: [PATCH 16/21] static_call: Allow early init In order to use static_call() to wire up x86_pmu, we need to initialize earlier, specifically before memory allocation works; copy some of the tricks from jump_label to enable this. Primarily we overload key->next to store a sites pointer when there are no modules, this avoids having to use kmalloc() to initialize the sites and allows us to run much earlier. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20200818135805.220737930@infradead.org --- arch/x86/kernel/setup.c | 2 + arch/x86/kernel/static_call.c | 5 ++- include/linux/static_call.h | 15 +++++++- kernel/static_call.c | 70 +++++++++++++++++++++++++++++++++-- 4 files changed, 85 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3511736fbc74..799a6de439ea 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -849,6 +850,7 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); arch_init_ideal_nops(); jump_label_init(); + static_call_init(); early_ioremap_init(); setup_olpc_ofw_pgd(); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 55140d8db106..ca9a380d9c0b 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,7 +11,7 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; -static void __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) { int size = CALL_INSN_SIZE; const void *code; @@ -38,6 +38,9 @@ static void __static_call_transform(void *insn, enum insn_type type, void *func) if (memcmp(insn, code, size) == 0) return; + if (unlikely(system_state == SYSTEM_BOOTING)) + return text_poke_early(insn, code, size); + text_poke_bp(insn, code, size, NULL); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 519bd666e096..bfa2ba39be57 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -136,6 +136,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #ifdef CONFIG_HAVE_STATIC_CALL_INLINE +extern void __init static_call_init(void); + struct static_call_mod { struct static_call_mod *next; struct module *mod; /* for vmlinux, mod == NULL */ @@ -144,7 +146,12 @@ struct static_call_mod { struct static_call_key { void *func; - struct static_call_mod *mods; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; }; extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); @@ -155,7 +162,7 @@ extern int static_call_text_reserved(void *start, void *end); DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ - .mods = NULL, \ + .type = 1, \ }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) @@ -180,6 +187,8 @@ extern int static_call_text_reserved(void *start, void *end); #elif defined(CONFIG_HAVE_STATIC_CALL) +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; @@ -225,6 +234,8 @@ static inline int static_call_text_reserved(void *start, void *end) #else /* Generic implementation */ +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; diff --git a/kernel/static_call.c b/kernel/static_call.c index d98e0e4272c1..f8362b3f8fd5 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -94,10 +94,31 @@ static inline void static_call_sort_entries(struct static_call_site *start, static_call_site_cmp, static_call_site_swap); } +static inline bool static_call_key_has_mods(struct static_call_key *key) +{ + return !(key->type & 1); +} + +static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) +{ + if (!static_call_key_has_mods(key)) + return NULL; + + return key->mods; +} + +static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) +{ + if (static_call_key_has_mods(key)) + return NULL; + + return (struct static_call_site *)(key->type & ~1); +} + void __static_call_update(struct static_call_key *key, void *tramp, void *func) { struct static_call_site *site, *stop; - struct static_call_mod *site_mod; + struct static_call_mod *site_mod, first; cpus_read_lock(); static_call_lock(); @@ -116,13 +137,22 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) if (WARN_ON_ONCE(!static_call_initialized)) goto done; - for (site_mod = key->mods; site_mod; site_mod = site_mod->next) { + first = (struct static_call_mod){ + .next = static_call_key_next(key), + .mod = NULL, + .sites = static_call_key_sites(key), + }; + + for (site_mod = &first; site_mod; site_mod = site_mod->next) { struct module *mod = site_mod->mod; if (!site_mod->sites) { /* * This can happen if the static call key is defined in * a module which doesn't use it. + * + * It also happens in the has_mods case, where the + * 'first' entry has no sites associated with it. */ continue; } @@ -192,16 +222,48 @@ static int __static_call_init(struct module *mod, if (key != prev_key) { prev_key = key; + /* + * For vmlinux (!mod) avoid the allocation by storing + * the sites pointer in the key itself. Also see + * __static_call_update()'s @first. + * + * This allows architectures (eg. x86) to call + * static_call_init() before memory allocation works. + */ + if (!mod) { + key->sites = site; + key->type |= 1; + goto do_transform; + } + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); if (!site_mod) return -ENOMEM; + /* + * When the key has a direct sites pointer, extract + * that into an explicit struct static_call_mod, so we + * can have a list of modules. + */ + if (static_call_key_sites(key)) { + site_mod->mod = NULL; + site_mod->next = NULL; + site_mod->sites = static_call_key_sites(key); + + key->mods = site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + } + site_mod->mod = mod; site_mod->sites = site; - site_mod->next = key->mods; + site_mod->next = static_call_key_next(key); key->mods = site_mod; } +do_transform: arch_static_call_transform(site_addr, NULL, key->func, static_call_is_tail(site)); } @@ -348,7 +410,7 @@ int static_call_text_reserved(void *start, void *end) return __static_call_mod_text_reserved(start, end); } -static void __init static_call_init(void) +void __init static_call_init(void) { int ret; From d25e37d89dd2f41d7acae0429039d2f0ae8b4a07 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 18 Aug 2020 15:57:52 +0200 Subject: [PATCH 17/21] tracepoint: Optimize using static_call() Currently the tracepoint site will iterate a vector and issue indirect calls to however many handlers are registered (ie. the vector is long). Using static_call() it is possible to optimize this for the common case of only having a single handler registered. In this case the static_call() can directly call this handler. Otherwise, if the vector is longer than 1, call a function that iterates the whole vector like the current code. [peterz: updated to new interface] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.279421092@infradead.org --- include/linux/tracepoint-defs.h | 5 ++ include/linux/tracepoint.h | 86 +++++++++++++++++++++++---------- include/trace/define_trace.h | 14 +++--- kernel/tracepoint.c | 25 ++++++++-- 4 files changed, 94 insertions(+), 36 deletions(-) diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index b29950a19205..de97450cf190 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -11,6 +11,8 @@ #include #include +struct static_call_key; + struct trace_print_flags { unsigned long mask; const char *name; @@ -30,6 +32,9 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ struct static_key key; + struct static_call_key *static_call_key; + void *static_call_tramp; + void *iterator; int (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 598fec9f9dbf..3722a10fc46d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -19,6 +19,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -92,7 +93,9 @@ extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ +#ifndef PARAMS #define PARAMS(args...) args +#endif #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) @@ -148,6 +151,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef TRACEPOINTS_ENABLED +#ifdef CONFIG_HAVE_STATIC_CALL +#define __DO_TRACE_CALL(name) static_call(tp_func_##name) +#else +#define __DO_TRACE_CALL(name) __tracepoint_iter_##name +#endif /* CONFIG_HAVE_STATIC_CALL */ + /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. @@ -157,12 +166,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * has a "void" prototype, then it is invalid to declare a function * as "(void *, void)". */ -#define __DO_TRACE(tp, proto, args, cond, rcuidle) \ +#define __DO_TRACE(name, proto, args, cond, rcuidle) \ do { \ struct tracepoint_func *it_func_ptr; \ - void *it_func; \ - void *__data; \ int __maybe_unused __idx = 0; \ + void *__data; \ \ if (!(cond)) \ return; \ @@ -182,14 +190,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_irq_enter_irqson(); \ } \ \ - it_func_ptr = rcu_dereference_raw((tp)->funcs); \ - \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ - do { \ - it_func = (it_func_ptr)->func; \ - __data = (it_func_ptr)->data; \ - ((void(*)(proto))(it_func))(args); \ - } while ((++it_func_ptr)->func); \ + __data = (it_func_ptr)->data; \ + __DO_TRACE_CALL(name)(args); \ } \ \ if (rcuidle) { \ @@ -205,7 +210,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ + __DO_TRACE(name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), 1); \ @@ -227,11 +232,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ + extern int __tracepoint_iter_##name(data_proto); \ + DECLARE_STATIC_CALL(tp_func_##name, __tracepoint_iter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ + __DO_TRACE(name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), 0); \ @@ -277,21 +284,50 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. */ -#define DEFINE_TRACE_FN(name, reg, unreg) \ - static const char __tpstrtab_##name[] \ - __section(__tracepoints_strings) = #name; \ - struct tracepoint __tracepoint_##name __used \ - __section(__tracepoints) = \ - { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ - __TRACEPOINT_ENTRY(name); +#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ + static const char __tpstrtab_##_name[] \ + __section(__tracepoints_strings) = #_name; \ + extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ + int __tracepoint_iter_##_name(void *__data, proto); \ + struct tracepoint __tracepoint_##_name __used \ + __section(__tracepoints) = { \ + .name = __tpstrtab_##_name, \ + .key = STATIC_KEY_INIT_FALSE, \ + .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ + .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ + .iterator = &__tracepoint_iter_##_name, \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + .funcs = NULL }; \ + __TRACEPOINT_ENTRY(_name); \ + int __tracepoint_iter_##_name(void *__data, proto) \ + { \ + struct tracepoint_func *it_func_ptr; \ + void *it_func; \ + \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ + do { \ + it_func = (it_func_ptr)->func; \ + __data = (it_func_ptr)->data; \ + ((void(*)(void *, proto))(it_func))(__data, args); \ + } while ((++it_func_ptr)->func); \ + return 0; \ + } \ + DEFINE_STATIC_CALL(tp_func_##_name, __tracepoint_iter_##_name); -#define DEFINE_TRACE(name) \ - DEFINE_TRACE_FN(name, NULL, NULL); +#define DEFINE_TRACE(name, proto, args) \ + DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ - EXPORT_SYMBOL_GPL(__tracepoint_##name) + EXPORT_SYMBOL_GPL(__tracepoint_##name); \ + EXPORT_SYMBOL_GPL(__tracepoint_iter_##name); \ + EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ - EXPORT_SYMBOL(__tracepoint_##name) + EXPORT_SYMBOL(__tracepoint_##name); \ + EXPORT_SYMBOL(__tracepoint_iter_##name); \ + EXPORT_STATIC_CALL(tp_func_##name) + #else /* !TRACEPOINTS_ENABLED */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ @@ -320,8 +356,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } -#define DEFINE_TRACE_FN(name, reg, unreg) -#define DEFINE_TRACE(name) +#define DEFINE_TRACE_FN(name, reg, unreg, proto, args) +#define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index bd75f97867b9..00723935dcc7 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -25,7 +25,7 @@ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - DEFINE_TRACE(name) + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_CONDITION #define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ @@ -39,12 +39,12 @@ #undef TRACE_EVENT_FN #define TRACE_EVENT_FN(name, proto, args, tstruct, \ assign, print, reg, unreg) \ - DEFINE_TRACE_FN(name, reg, unreg) + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_FN_COND #define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \ assign, print, reg, unreg) \ - DEFINE_TRACE_FN(name, reg, unreg) + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) @@ -54,15 +54,15 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ - DEFINE_TRACE(name) + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_FN #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ - DEFINE_TRACE_FN(name, reg, unreg) + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ - DEFINE_TRACE(name) + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_CONDITION #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ @@ -70,7 +70,7 @@ #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ - DEFINE_TRACE(name) + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef TRACE_INCLUDE #undef __TRACE_INCLUDE diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 8e05ed2cd39e..e92f3fb8887a 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -221,6 +221,20 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) +{ + void *func = tp->iterator; + + /* Synthetic events do not have static call sites */ + if (!tp->static_call_key) + return; + + if (!tp_funcs[1].func) + func = tp_funcs[0].func; + + __static_call_update(tp->static_call_key, tp->static_call_tramp, func); +} + /* * Add the probe function to a tracepoint. */ @@ -251,8 +265,9 @@ static int tracepoint_add_func(struct tracepoint *tp, * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); - if (!static_key_enabled(&tp->key)) - static_key_slow_inc(&tp->key); + tracepoint_update_call(tp, tp_funcs); + static_key_enable(&tp->key); + release_probes(old); return 0; } @@ -281,9 +296,11 @@ static int tracepoint_remove_func(struct tracepoint *tp, if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); - if (static_key_enabled(&tp->key)) - static_key_slow_dec(&tp->key); + static_key_disable(&tp->key); + } else { + tracepoint_update_call(tp, tp_funcs); } + rcu_assign_pointer(tp->funcs, tp_funcs); release_probes(old); return 0; From 7c9903c9bf716d89b34f96cc2ed64e28dabf570b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:53 +0200 Subject: [PATCH 18/21] x86/perf, static_call: Optimize x86_pmu methods Replace many of the indirect calls with static_call(). The average PMI time, as measured by perf_sample_event_took()*: PRE: 3283.03 [ns] POST: 3145.12 [ns] Which is a ~138 [ns] win per PMI, or a ~4.2% decrease. [*] on an IVB-EP, using: 'perf record -a -e cycles -- make O=defconfig-build/ -j80' Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.338001015@infradead.org --- arch/x86/events/core.c | 134 +++++++++++++++++++++++++++++------------ 1 file changed, 94 insertions(+), 40 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1cbf57dc2ac8..360c395d51d0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,34 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); +/* + * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined + * from just a typename, as opposed to an actual function. + */ +DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); + +DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); +DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); +DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); + +DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events); +DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints); +DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints); + +DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); +DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); + +DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); +DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); + +DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); + u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -660,7 +689,7 @@ static void x86_pmu_disable(struct pmu *pmu) cpuc->enabled = 0; barrier(); - x86_pmu.disable_all(); + static_call(x86_pmu_disable_all)(); } void x86_pmu_enable_all(int added) @@ -907,8 +936,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) n0 -= cpuc->n_txn; - if (x86_pmu.start_scheduling) - x86_pmu.start_scheduling(cpuc); + static_call_cond(x86_pmu_start_scheduling)(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { c = cpuc->event_constraint[i]; @@ -925,7 +953,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * change due to external factors (sibling state, allow_tfa). */ if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) { - c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); + c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]); cpuc->event_constraint[i] = c; } @@ -1008,8 +1036,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!unsched && assign) { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; - if (x86_pmu.commit_scheduling) - x86_pmu.commit_scheduling(cpuc, i, assign[i]); + static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]); } } else { for (i = n0; i < n; i++) { @@ -1018,15 +1045,13 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* * release events that failed scheduling */ - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, e); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, e); cpuc->event_constraint[i] = NULL; } } - if (x86_pmu.stop_scheduling) - x86_pmu.stop_scheduling(cpuc); + static_call_cond(x86_pmu_stop_scheduling)(cpuc); return unsched ? -EINVAL : 0; } @@ -1226,7 +1251,7 @@ static void x86_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - x86_pmu.enable_all(added); + static_call(x86_pmu_enable_all)(added); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -1347,7 +1372,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) goto out; /* @@ -1365,13 +1390,11 @@ static int x86_pmu_add(struct perf_event *event, int flags) cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - if (x86_pmu.add) { - /* - * This is before x86_pmu_enable() will call x86_pmu_start(), - * so we enable LBRs before an event needs them etc.. - */ - x86_pmu.add(event); - } + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + static_call_cond(x86_pmu_add)(event); ret = 0; out: @@ -1399,7 +1422,7 @@ static void x86_pmu_start(struct perf_event *event, int flags) cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); __set_bit(idx, cpuc->running); - x86_pmu.enable(event); + static_call(x86_pmu_enable)(event); perf_event_update_userpage(event); } @@ -1469,7 +1492,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; if (test_bit(hwc->idx, cpuc->active_mask)) { - x86_pmu.disable(event); + static_call(x86_pmu_disable)(event); __clear_bit(hwc->idx, cpuc->active_mask); cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); @@ -1519,8 +1542,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (i >= cpuc->n_events - cpuc->n_added) --cpuc->n_added; - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + static_call_cond(x86_pmu_put_event_constraints)(cpuc, event); /* Delete the array entry. */ while (++i < cpuc->n_events) { @@ -1533,13 +1555,12 @@ static void x86_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); do_del: - if (x86_pmu.del) { - /* - * This is after x86_pmu_stop(); so we disable LBRs after any - * event can need them etc.. - */ - x86_pmu.del(event); - } + + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + static_call_cond(x86_pmu_del)(event); } int x86_pmu_handle_irq(struct pt_regs *regs) @@ -1617,7 +1638,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; start_clock = sched_clock(); - ret = x86_pmu.handle_irq(regs); + ret = static_call(x86_pmu_handle_irq)(regs); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -1830,6 +1851,38 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) static struct attribute_group x86_pmu_attr_group; static struct attribute_group x86_pmu_caps_group; +static void x86_pmu_static_call_update(void) +{ + static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq); + static_call_update(x86_pmu_disable_all, x86_pmu.disable_all); + static_call_update(x86_pmu_enable_all, x86_pmu.enable_all); + static_call_update(x86_pmu_enable, x86_pmu.enable); + static_call_update(x86_pmu_disable, x86_pmu.disable); + + static_call_update(x86_pmu_add, x86_pmu.add); + static_call_update(x86_pmu_del, x86_pmu.del); + static_call_update(x86_pmu_read, x86_pmu.read); + + static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events); + static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints); + static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints); + + static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling); + static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling); + static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); + + static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); + static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); + + static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); + static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); +} + +static void _x86_pmu_read(struct perf_event *event) +{ + x86_perf_event_update(event); +} + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1898,6 +1951,11 @@ static int __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + if (!x86_pmu.read) + x86_pmu.read = _x86_pmu_read; + + x86_pmu_static_call_update(); + /* * Install callbacks. Core will call them for each online * cpu. @@ -1934,11 +1992,9 @@ static int __init init_hw_perf_events(void) } early_initcall(init_hw_perf_events); -static inline void x86_pmu_read(struct perf_event *event) +static void x86_pmu_read(struct perf_event *event) { - if (x86_pmu.read) - return x86_pmu.read(event); - x86_perf_event_update(event); + static_call(x86_pmu_read)(event); } /* @@ -2015,7 +2071,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) if (!x86_pmu_initialized()) return -EAGAIN; - ret = x86_pmu.schedule_events(cpuc, n, assign); + ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign); if (ret) return ret; @@ -2308,15 +2364,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.sched_task) - x86_pmu.sched_task(ctx, sched_in); + static_call_cond(x86_pmu_sched_task)(ctx, sched_in); } static void x86_pmu_swap_task_ctx(struct perf_event_context *prev, struct perf_event_context *next) { - if (x86_pmu.swap_task_ctx) - x86_pmu.swap_task_ctx(prev, next); + static_call_cond(x86_pmu_swap_task_ctx)(prev, next); } void perf_check_microcode(void) From de394e7568ce2cdb4643ed230169f484f25f9442 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Tue, 8 Sep 2020 12:57:43 +0200 Subject: [PATCH 19/21] tracepoint: Fix overly long tracepoint names Stephen Rothwell reported: > Exported symbols need to be <= (64 - sizeof(Elf_Addr)) long. This is > presumably 56 on 64 bit arches and the above symbol (including the '.') > is 56 characters long. Shorten the tracepoint symbol name. Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()") Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200908105743.GW2674@hirez.programming.kicks-ass.net --- include/linux/tracepoint.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3722a10fc46d..81fa0b2f271e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -154,7 +154,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef CONFIG_HAVE_STATIC_CALL #define __DO_TRACE_CALL(name) static_call(tp_func_##name) #else -#define __DO_TRACE_CALL(name) __tracepoint_iter_##name +#define __DO_TRACE_CALL(name) __traceiter_##name #endif /* CONFIG_HAVE_STATIC_CALL */ /* @@ -232,8 +232,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ - extern int __tracepoint_iter_##name(data_proto); \ - DECLARE_STATIC_CALL(tp_func_##name, __tracepoint_iter_##name); \ + extern int __traceiter_##name(data_proto); \ + DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -288,19 +288,19 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static const char __tpstrtab_##_name[] \ __section(__tracepoints_strings) = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ - int __tracepoint_iter_##_name(void *__data, proto); \ + int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ __section(__tracepoints) = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ - .iterator = &__tracepoint_iter_##_name, \ + .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ - int __tracepoint_iter_##_name(void *__data, proto) \ + int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ @@ -314,18 +314,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } while ((++it_func_ptr)->func); \ return 0; \ } \ - DEFINE_STATIC_CALL(tp_func_##_name, __tracepoint_iter_##_name); + DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); #define DEFINE_TRACE(name, proto, args) \ DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ - EXPORT_SYMBOL_GPL(__tracepoint_iter_##name); \ + EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ - EXPORT_SYMBOL(__tracepoint_iter_##name); \ + EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_STATIC_CALL(tp_func_##name) From 547305a64632813286700cb6d768bfe773df7d19 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 1 Oct 2020 21:27:57 -0400 Subject: [PATCH 20/21] tracepoint: Fix out of sync data passing by static caller Naresh reported a bug that appears to be a side effect of the static calls. It happens when going from more than one tracepoint callback to a single one, and removing the first callback on the list. The list of tracepoint callbacks holds data and a function to call with the parameters of that tracepoint and a handler to the associated data. old_list: 0: func = foo; data = NULL; 1: func = bar; data = &bar_struct; new_list: 0: func = bar; data = &bar_struct; CPU 0 CPU 1 ----- ----- tp_funcs = old_list; tp_static_caller = tp_interator __DO_TRACE() data = tp_funcs[0].data = NULL; tp_funcs = new_list; tracepoint_update_call() tp_static_caller = tp_funcs[0] = bar; tp_static_caller(data) bar(data) x = data->item = NULL->item BOOM! To solve this, add a tracepoint_synchronize_unregister() between changing tp_funcs and updating the static tracepoint, that does both a synchronize_rcu() and synchronize_srcu(). This will ensure that when the static call is updated to the single callback that it will be receiving the data that it registered with. Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") Reported-by: Naresh Kamboju Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/linux-next/CA+G9fYvPXVRO0NV7yL=FxCmFEMYkCwdz7R=9W+_votpT824YJA@mail.gmail.com --- kernel/tracepoint.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e92f3fb8887a..26efd22f0633 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -221,7 +221,7 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) { void *func = tp->iterator; @@ -229,8 +229,17 @@ static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func if (!tp->static_call_key) return; - if (!tp_funcs[1].func) + if (!tp_funcs[1].func) { func = tp_funcs[0].func; + /* + * If going from the iterator back to a single caller, + * we need to synchronize with __DO_TRACE to make sure + * that the data passed to the callback is the one that + * belongs to that callback. + */ + if (sync) + tracepoint_synchronize_unregister(); + } __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -265,7 +274,7 @@ static int tracepoint_add_func(struct tracepoint *tp, * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs); + tracepoint_update_call(tp, tp_funcs, false); static_key_enable(&tp->key); release_probes(old); @@ -297,11 +306,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp->unregfunc(); static_key_disable(&tp->key); + rcu_assign_pointer(tp->funcs, tp_funcs); } else { - tracepoint_update_call(tp, tp_funcs); + rcu_assign_pointer(tp->funcs, tp_funcs); + tracepoint_update_call(tp, tp_funcs, + tp_funcs[0].func != old[0].func); } - - rcu_assign_pointer(tp->funcs, tp_funcs); release_probes(old); return 0; } From 69e0ad37c9f32d5aa1beb02aab4ec0cd055be013 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 28 Sep 2020 16:09:39 -0700 Subject: [PATCH 21/21] static_call: Fix return type of static_call_init Functions that are passed to early_initcall should be of type initcall_t, which expects a return type of int. This is not currently an error but a patch in the Clang LTO series could change that in the future. Fixes: 9183c3f9ed71 ("static_call: Add inline static call infrastructure") Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/lkml/20200903203053.3411268-17-samitolvanen@google.com/ --- include/linux/static_call.h | 6 +++--- kernel/static_call.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index bfa2ba39be57..695da4c9b338 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -136,7 +136,7 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #ifdef CONFIG_HAVE_STATIC_CALL_INLINE -extern void __init static_call_init(void); +extern int __init static_call_init(void); struct static_call_mod { struct static_call_mod *next; @@ -187,7 +187,7 @@ extern int static_call_text_reserved(void *start, void *end); #elif defined(CONFIG_HAVE_STATIC_CALL) -static inline void static_call_init(void) { } +static inline int static_call_init(void) { return 0; } struct static_call_key { void *func; @@ -234,7 +234,7 @@ static inline int static_call_text_reserved(void *start, void *end) #else /* Generic implementation */ -static inline void static_call_init(void) { } +static inline int static_call_init(void) { return 0; } struct static_call_key { void *func; diff --git a/kernel/static_call.c b/kernel/static_call.c index f8362b3f8fd5..84565c2a41b8 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -410,12 +410,12 @@ int static_call_text_reserved(void *start, void *end) return __static_call_mod_text_reserved(start, end); } -void __init static_call_init(void) +int __init static_call_init(void) { int ret; if (static_call_initialized) - return; + return 0; cpus_read_lock(); static_call_lock(); @@ -434,6 +434,7 @@ void __init static_call_init(void) #ifdef CONFIG_MODULES register_module_notifier(&static_call_module_nb); #endif + return 0; } early_initcall(static_call_init);