mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-18 00:24:39 +00:00
4bbd934556
kretprobe is using freelist to manage return-instances, but freelist, as LIFO queue based on singly linked list, scales badly and reduces the overall throughput of kretprobed routines, especially for high contention scenarios. Here's a typical throughput test of sys_prctl (counts in 10 seconds, measured with perf stat -a -I 10000 -e syscalls:sys_enter_prctl): OS: Debian 10 X86_64, Linux 6.5rc7 with freelist HW: XEON 8336C x 2, 64 cores/128 threads, DDR4 3200MT/s 1T 2T 4T 8T 16T 24T 24150045 29317964 15446741 12494489 18287272 17708768 32T 48T 64T 72T 96T 128T 16200682 13737658 11645677 11269858 10470118 9931051 This patch introduces objpool to replace freelist. objpool is a high performance queue, which can bring near-linear scalability to kretprobed routines. Tests of kretprobe throughput show the biggest ratio as 159x of original freelist. Here's the result: 1T 2T 4T 8T 16T native: 41186213 82336866 164250978 328662645 658810299 freelist: 24150045 29317964 15446741 12494489 18287272 objpool: 23926730 48010314 96125218 191782984 385091769 32T 48T 64T 96T 128T native: 1330338351 1969957941 2512291791 2615754135 2671040914 freelist: 16200682 13737658 11645677 10470118 9931051 objpool: 764481096 1147149781 1456220214 1502109662 1579015050 Testings on 96-core ARM64 output similarly, but with the biggest ratio up to 448x: OS: Debian 10 AARCH64, Linux 6.5rc7 HW: Kunpeng-920 96 cores/2 sockets/4 NUMA nodes, DDR4 2933 MT/s 1T 2T 4T 8T 16T native: . 30066096 63569843 126194076 257447289 505800181 freelist: 16152090 11064397 11124068 7215768 5663013 objpool: 13997541 28032100 55726624 110099926 221498787 24T 32T 48T 64T 96T native: 763305277 1015925192 1521075123 2033009392 3021013752 freelist: 5015810 4602893 3766792 3382478 2945292 objpool: 328192025 439439564 668534502 887401381 1319972072 Link: https://lore.kernel.org/all/20231017135654.82270-4-wuqiang.matt@bytedance.com/ Signed-off-by: wuqiang.matt <wuqiang.matt@bytedance.com> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
93 lines
2.9 KiB
C
93 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Return hooking with list-based shadow stack.
|
|
*/
|
|
#ifndef _LINUX_RETHOOK_H
|
|
#define _LINUX_RETHOOK_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <linux/objpool.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/llist.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
struct rethook_node;
|
|
|
|
typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, struct pt_regs *);
|
|
|
|
/**
|
|
* struct rethook - The rethook management data structure.
|
|
* @data: The user-defined data storage.
|
|
* @handler: The user-defined return hook handler.
|
|
* @pool: The pool of struct rethook_node.
|
|
* @ref: The reference counter.
|
|
* @rcu: The rcu_head for deferred freeing.
|
|
*
|
|
* Don't embed to another data structure, because this is a self-destructive
|
|
* data structure when all rethook_node are freed.
|
|
*/
|
|
struct rethook {
|
|
void *data;
|
|
rethook_handler_t handler;
|
|
struct objpool_head pool;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
/**
|
|
* struct rethook_node - The rethook shadow-stack entry node.
|
|
* @rcu: The rcu_head for deferred freeing.
|
|
* @llist: The llist, linked to a struct task_struct::rethooks.
|
|
* @rethook: The pointer to the struct rethook.
|
|
* @ret_addr: The storage for the real return address.
|
|
* @frame: The storage for the frame pointer.
|
|
*
|
|
* You can embed this to your extended data structure to store any data
|
|
* on each entry of the shadow stack.
|
|
*/
|
|
struct rethook_node {
|
|
struct rcu_head rcu;
|
|
struct llist_node llist;
|
|
struct rethook *rethook;
|
|
unsigned long ret_addr;
|
|
unsigned long frame;
|
|
};
|
|
|
|
struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num);
|
|
void rethook_stop(struct rethook *rh);
|
|
void rethook_free(struct rethook *rh);
|
|
struct rethook_node *rethook_try_get(struct rethook *rh);
|
|
void rethook_recycle(struct rethook_node *node);
|
|
void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount);
|
|
unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame,
|
|
struct llist_node **cur);
|
|
|
|
/* Arch dependent code must implement arch_* and trampoline code */
|
|
void arch_rethook_prepare(struct rethook_node *node, struct pt_regs *regs, bool mcount);
|
|
void arch_rethook_trampoline(void);
|
|
|
|
/**
|
|
* is_rethook_trampoline() - Check whether the address is rethook trampoline
|
|
* @addr: The address to be checked
|
|
*
|
|
* Return true if the @addr is the rethook trampoline address.
|
|
*/
|
|
static inline bool is_rethook_trampoline(unsigned long addr)
|
|
{
|
|
return addr == (unsigned long)dereference_symbol_descriptor(arch_rethook_trampoline);
|
|
}
|
|
|
|
/* If the architecture needs to fixup the return address, implement it. */
|
|
void arch_rethook_fixup_return(struct pt_regs *regs,
|
|
unsigned long correct_ret_addr);
|
|
|
|
/* Generic trampoline handler, arch code must prepare asm stub */
|
|
unsigned long rethook_trampoline_handler(struct pt_regs *regs,
|
|
unsigned long frame);
|
|
|
|
#ifdef CONFIG_RETHOOK
|
|
void rethook_flush_task(struct task_struct *tk);
|
|
#else
|
|
#define rethook_flush_task(tsk) do { } while (0)
|
|
#endif
|
|
|
|
#endif
|