Reduce ftrace overhead to 280ns

This commit is contained in:
Justine Tunney 2022-05-20 04:46:42 -07:00
parent 4245da19e2
commit c8a2f04058
2 changed files with 33 additions and 17 deletions

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#define ShouldUseMsabiAttribute() 1
#include "libc/bits/bits.h"
#include "libc/bits/likely.h"
#include "libc/bits/safemacros.internal.h"
#include "libc/bits/weaken.h"
@ -707,9 +708,14 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt, va_list va,
if (p < e) *p = hash;
++p;
}
for (; cols > i; --cols) {
if (p < e) {
while (cols > i) {
if (p + 8 < e && cols - i > 8) {
WRITE64LE(p, 0x2020202020202020);
cols -= 8;
p += 8;
} else if (p < e) {
*p++ = ' ';
--cols;
} else {
p = kadvance(p, e, cols - i);
break;

View file

@ -18,10 +18,12 @@
*/
#include "libc/calls/calls.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/lockcmpxchgp.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/stackframe.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/symbols.internal.h"
@ -69,21 +71,29 @@ static privileged inline void ReleaseFtraceLock(void) {
static privileged inline bool AcquireFtraceLock(void) {
int me, owner, tries;
for (tries = 0, me = gettid();;) {
owner = 0;
if (_lockcmpxchgp(&ftrace_lock, &owner, me)) {
return true;
}
if (owner == me) {
// we ignore re-entry into ftrace. while the code and build config
// is written to make re-entry highly unlikely, it's impossible to
// guarantee. there's also the possibility of asynchronous signals
return false;
}
if (++tries & 7) {
__builtin_ia32_pause();
} else {
sched_yield();
if (!__threaded) {
return _cmpxchg(&ftrace_lock, 0, -1);
} else {
for (tries = 0, me = gettid();;) {
owner = 0;
if (_lockcmpxchgp(&ftrace_lock, &owner, me)) {
return true;
}
if (owner == -1) {
// avoid things getting weird after first clone() call transition
return false;
}
if (owner == me) {
// we ignore re-entry into ftrace. while the code and build config
// is written to make re-entry highly unlikely, it's impossible to
// guarantee. there's also the possibility of asynchronous signals
return false;
}
if (++tries & 7) {
__builtin_ia32_pause();
} else {
sched_yield();
}
}
}
}