Add SNI support to redbean and improve SSL perf

This change makes SSL virtual hosting possible. You can now load
multiple certificates for multiple domains and redbean will just
figure out which one to use, even if you only have 1 ip address.
You can also use a jumbo certificate that lists all your domains
in the the subject alternative names.

This change also makes performance improvements to MbedTLS. Here
are some benchmarks vs. cc1920749e

                                   BEFORE    AFTER   (microsecs)
suite_ssl.com                     2512881   191738 13.11x faster
suite_pkparse.com                   36291     3295 11.01x faster
suite_x509parse.com                854669   120293  7.10x faster
suite_pkwrite.com                    6549     1265  5.18x faster
suite_ecdsa.com                     53347    18778  2.84x faster
suite_pk.com                        49051    18717  2.62x faster
suite_ecdh.com                      19535     9502  2.06x faster
suite_shax.com                      15848     7965  1.99x faster
suite_rsa.com                      353257   184828  1.91x faster
suite_x509write.com                162646    85733  1.90x faster
suite_ecp.com                       20503    11050  1.86x faster
suite_hmac_drbg.no_reseed.com       19528    11417  1.71x faster
suite_hmac_drbg.nopr.com            12460     8010  1.56x faster
suite_mpi.com                      687124   442661  1.55x faster
suite_hmac_drbg.pr.com              11890     7752  1.53x faster

There aren't any special tricks to the performance imporvements.
It's mostly due to code cleanup, assembly and intel instructions
like mulx, adox, and adcx.
This commit is contained in:
Justine Tunney 2021-07-19 14:55:20 -07:00
parent f3e28aa192
commit 398f0c16fb
190 changed files with 14367 additions and 8928 deletions

View file

@ -53,6 +53,7 @@ char *dirname(char *);
char *basename(const char *) nosideeffect;
char *basename_n(const char *, size_t) nosideeffect;
bool isabspath(const char *) paramsnonnull() nosideeffect;
char *stripexts(char *);
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § conversion » computation

View file

@ -191,7 +191,12 @@ hidden int __fmt(void *fn, void *arg, const char *format, va_list va) {
} else if (format[1] == '.' && format[2] == '*' && format[3] == 's') {
n = va_arg(va, unsigned); /* FAST PATH: PRECISION STRING */
s = va_arg(va, const char *);
if (!s) s = "(null)", n = MIN(6, n);
if (s) {
n = strnlen(s, n);
} else {
s = "(null)";
n = MIN(6, n);
}
if (out(s, arg, n) == -1) return -1;
format += 4;
continue;
@ -418,10 +423,12 @@ hidden int __fmt(void *fn, void *arg, const char *format, va_list va) {
if (flags & FLAGS_ZEROPAD) {
if (sign) PUT(sign);
sign = 0;
do PUT('0');
do
PUT('0');
while (--width > 0);
} else {
do PUT(' ');
do
PUT(' ');
while (--width > 0);
}
}
@ -523,10 +530,12 @@ hidden int __fmt(void *fn, void *arg, const char *format, va_list va) {
if (flags & FLAGS_ZEROPAD) {
if (sign) PUT(sign);
sign = 0;
do PUT('0');
do
PUT('0');
while (--width > 0);
} else {
do PUT(' ');
do
PUT(' ');
while (--width > 0);
}
}
@ -673,10 +682,12 @@ hidden int __fmt(void *fn, void *arg, const char *format, va_list va) {
PUT(sign);
sign = 0;
}
do PUT('0');
do
PUT('0');
while (--width > 0);
} else {
do PUT(' ');
do
PUT(' ');
while (--width > 0);
}
}

42
libc/fmt/stripexts.c Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/fmt.h"
#include "libc/str/str.h"
/**
* Removes file extensions.
*
* @param s is mutated
* @return s
*/
char *stripexts(char *s) {
size_t i;
for (i = strlen(s); i--;) {
switch (s[i]) {
case '.':
s[i] = 0;
break;
case '/':
return s;
default:
break;
}
}
return s;
}

View file

@ -660,6 +660,10 @@ static void *__asan_realloc(void *p, size_t n) {
return q;
}
static void *__asan_realloc_in_place(void *p, size_t n) {
return 0;
}
static void *__asan_valloc(size_t n) {
return __asan_memalign(PAGESIZE, n);
}
@ -752,6 +756,7 @@ void __asan_install_malloc_hooks(void) {
HOOK(hook_realloc, __asan_realloc);
HOOK(hook_memalign, __asan_memalign);
HOOK(hook_malloc_trim, __asan_malloc_trim);
HOOK(hook_realloc_in_place, __asan_realloc_in_place);
HOOK(hook_malloc_usable_size, __asan_malloc_usable_size);
}
@ -847,7 +852,8 @@ textstartup void __asan_init(int argc, char **argv, char **envp,
REQUIRE(FindMemoryInterval);
REQUIRE(TrackMemoryInterval);
if (weaken(hook_malloc) || weaken(hook_calloc) || weaken(hook_realloc) ||
weaken(hook_pvalloc) || weaken(hook_valloc) || weaken(hook_free) ||
weaken(hook_realloc_in_place) || weaken(hook_pvalloc) ||
weaken(hook_valloc) || weaken(hook_free) ||
weaken(hook_malloc_usable_size)) {
REQUIRE(dlmemalign);
REQUIRE(dlmalloc_usable_size);

View file

@ -48,7 +48,7 @@ static int PrintBacktraceUsingAddr2line(int fd, const struct StackFrame *bp) {
struct Garbages *garbage;
sigset_t chldmask, savemask;
const struct StackFrame *frame;
const char *debugbin, *p1, *p2, *p3, *addr2line;
char *debugbin, *p1, *p2, *p3, *addr2line;
char buf[kBacktraceBufSize], *argv[kBacktraceMaxFrames];
if (IsOpenbsd()) return -1;
if (IsWindows()) return -1;
@ -90,14 +90,44 @@ static int PrintBacktraceUsingAddr2line(int fd, const struct StackFrame *bp) {
}
close(pipefds[1]);
while ((got = read(pipefds[0], buf, kBacktraceBufSize)) > 0) {
for (p1 = buf; got;) {
/*
* remove racist output from gnu tooling, that can't be disabled
* otherwise, since it breaks other tools like emacs that aren't
* equipped to ignore it, and what's most problematic is that
* addr2line somehow manages to put the racism onto the one line
* in the backtrace we actually care about.
*/
p1 = buf;
p3 = p1 + got;
/*
* Remove deep libc error reporting facilities from backtraces.
*
* For example, if the following shows up in Emacs:
*
* 40d097: __die at libc/log/die.c:33
* 434daa: __asan_die at libc/intrin/asan.c:483
* 435146: __asan_report_memory_fault at libc/intrin/asan.c:524
* 435b32: __asan_report_store at libc/intrin/asan.c:719
* 43472e: __asan_report_store1 at libc/intrin/somanyasan.S:118
* 40c3a9: GetCipherSuite at net/https/getciphersuite.c:80
* 4383a5: GetCipherSuite_test at test/net/https/getciphersuite.c:23
* ...
*
* Then it's unpleasant to need to press C-x C-n six times.
*/
while ((p2 = memchr(p1, '\n', p3 - p1))) {
if (memmem(p1, p2 - p1, ": __asan_", 9) ||
memmem(p1, p2 - p1, ": __die", 7)) {
memmove(p1, p2 + 1, p3 - (p2 + 1));
p3 -= p2 + 1 - p1;
} else {
p1 = p2 + 1;
break;
}
}
/*
* remove racist output from gnu tooling, that can't be disabled
* otherwise, since it breaks other tools like emacs that aren't
* equipped to ignore it, and what's most problematic is that
* addr2line somehow manages to put the racism onto the one line
* in the backtrace we actually care about.
*/
for (got = p3 - buf, p1 = buf; got;) {
if ((p2 = memmem(p1, got, " (discriminator ",
strlen(" (discriminator ") - 1)) &&
(p3 = memchr(p2, '\n', got - (p2 - p1)))) {

40
libc/log/getcallername.c Normal file
View file

@ -0,0 +1,40 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/bisectcarleft.internal.h"
#include "libc/log/log.h"
#include "libc/nexgen32e/stackframe.h"
#include "libc/runtime/symbols.internal.h"
/**
* Returns name of funciton that called caller function.
*/
const char *GetCallerName(const struct StackFrame *bp) {
struct SymbolTable *st;
if (!bp && (bp = __builtin_frame_address(0))) bp = bp->next;
if (bp && (st = GetSymbolTable()) && st->count &&
((intptr_t)bp->addr >= (intptr_t)&_base &&
(intptr_t)bp->addr <= (intptr_t)&_end)) {
return st->name_base +
st->symbols[bisectcarleft((const int32_t(*)[2])st->symbols,
st->count, bp->addr - st->addr_base - 1)]
.name_rva;
} else {
return 0;
}
}

View file

@ -3,6 +3,8 @@
#include "libc/bits/likely.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/winsize.h"
#include "libc/nexgen32e/stackframe.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § liblog
@ -21,7 +23,7 @@
*/
#ifndef LOGGABLELEVEL
#ifndef TINY
#define LOGGABLELEVEL kLogDebug
#define LOGGABLELEVEL kLogNoise
/* #elif IsTiny() */
/* #define LOGGABLELEVEL kLogInfo */
#else
@ -44,6 +46,7 @@ bool IsTerminalInarticulate(void) nosideeffect;
const char *commandvenv(const char *, const char *);
const char *GetAddr2linePath(void);
const char *GetGdbPath(void);
const char *GetCallerName(const struct StackFrame *);
void showcrashreports(void);
void callexitontermination(struct sigset *);
@ -63,6 +66,7 @@ extern unsigned __log_level; /* log level for runtime check */
#define FATALF(FMT, ...) \
do { \
++ftrace; \
ffatalf(kLogFatal, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
unreachable; \
} while (0)
@ -70,137 +74,174 @@ extern unsigned __log_level; /* log level for runtime check */
#define WARNF(FMT, ...) \
do { \
if (LOGGABLE(kLogWarn)) { \
++ftrace; \
flogf(kLogWarn, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define LOGF(FMT, ...) \
do { \
if (LOGGABLE(kLogInfo)) { \
++ftrace; \
flogf(kLogInfo, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define VERBOSEF(FMT, ...) \
do { \
if (LOGGABLE(kLogVerbose)) { \
++ftrace; \
fverbosef(kLogVerbose, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define DEBUGF(FMT, ...) \
do { \
if (LOGGABLE(kLogDebug)) { \
if (UNLIKELY(LOGGABLE(kLogDebug))) { \
++ftrace; \
fdebugf(kLogDebug, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define NOISEF(FMT, ...) \
do { \
if (LOGGABLE(kLogNoise)) { \
if (UNLIKELY(LOGGABLE(kLogNoise))) { \
++ftrace; \
fnoisef(kLogNoise, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define VFLOG(FMT, VA) \
do { \
if (LOGGABLE(kLogInfo)) { \
++ftrace; \
vflogf(kLogInfo, __FILE__, __LINE__, NULL, FMT, VA); \
--ftrace; \
} \
} while (0)
#define FLOGF(F, FMT, ...) \
do { \
if (LOGGABLE(kLogInfo)) { \
++ftrace; \
flogf(kLogInfo, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define VFLOGF(F, FMT, VA) \
do { \
if (LOGGABLE(kLogInfo)) { \
++ftrace; \
vflogf(kLogInfo, __FILE__, __LINE__, F, FMT, VA); \
--ftrace; \
} \
} while (0)
#define VWARNF(FMT, VA) \
do { \
if (LOGGABLE(kLogWarn)) { \
++ftrace; \
vflogf(kLogWarn, __FILE__, __LINE__, NULL, FMT, VA); \
--ftrace; \
} \
} while (0)
#define FWARNF(F, FMT, ...) \
do { \
if (LOGGABLE(kLogWarn)) { \
++ftrace; \
flogf(kLogWarn, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define VFWARNF(F, FMT, VA) \
do { \
if (LOGGABLE(kLogWarn)) { \
++ftrace; \
vflogf(kLogWarn, __FILE__, __LINE__, F, FMT, VA); \
--ftrace; \
} \
} while (0)
#define VFATALF(FMT, VA) \
do { \
++ftrace; \
vffatalf(kLogFatal, __FILE__, __LINE__, NULL, FMT, VA); \
unreachable; \
} while (0)
#define FFATALF(F, FMT, ...) \
do { \
++ftrace; \
ffatalf(kLogFatal, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
unreachable; \
} while (0)
#define VFFATALF(F, FMT, VA) \
do { \
++ftrace; \
vffatalf(kLogFatal, __FILE__, __LINE__, F, FMT, VA); \
unreachable; \
} while (0)
#define VDEBUGF(FMT, VA) \
do { \
if (LOGGABLE(kLogDebug)) { \
if (UNLIKELY(LOGGABLE(kLogDebug))) { \
++ftrace; \
vfdebugf(kLogDebug, __FILE__, __LINE__, NULL, FMT, VA); \
--ftrace; \
} \
} while (0)
#define FDEBUGF(F, FMT, ...) \
do { \
if (LOGGABLE(kLogDebug)) { \
if (UNLIKELY(LOGGABLE(kLogDebug))) { \
++ftrace; \
fdebugf(kLogDebug, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
#define VFVERBOSEF(F, FMT, VA) \
do { \
if (LOGGABLE(kLogVerbose)) { \
++ftrace; \
vfverbosef(kLogVerbose, __FILE__, __LINE__, F, FMT, VA); \
--ftrace; \
} \
} while (0)
#define VFDEBUGF(F, FMT, VA) \
do { \
if (LOGGABLE(kLogDebug)) { \
++ftrace; \
vfdebugf(kLogDebug, __FILE__, __LINE__, F, FMT, VA); \
--ftrace; \
} \
} while (0)
#define VNOISEF(FMT, VA) \
do { \
if (LOGGABLE(kLogNoise)) { \
if (UNLIKELY(LOGGABLE(kLogNoise))) { \
++ftrace; \
vfnoisef(kLogNoise, __FILE__, __LINE__, NULL, FMT, VA); \
--ftrace; \
} \
} while (0)
#define FNOISEF(F, FMT, ...) \
do { \
if (LOGGABLE(kLogNoise)) { \
if (UNLIKELY(LOGGABLE(kLogNoise))) { \
++ftrace; \
fnoisef(kLogNoise, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
--ftrace; \
} \
} while (0)
@ -208,20 +249,24 @@ extern unsigned __log_level; /* log level for runtime check */
cosmopolitan § liblog » on error resume next
*/
#define LOGIFNEG1(FORM) \
({ \
autotype(FORM) Ax = (FORM); \
if (Ax == (typeof(Ax))(-1) && LOGGABLE(kLogWarn)) { \
__logerrno(__FILE__, __LINE__, #FORM); \
} \
Ax; \
#define LOGIFNEG1(FORM) \
({ \
autotype(FORM) Ax = (FORM); \
if (UNLIKELY(Ax == (typeof(Ax))(-1)) && LOGGABLE(kLogWarn)) { \
++ftrace; \
__logerrno(__FILE__, __LINE__, #FORM); \
--ftrace; \
} \
Ax; \
})
#define LOGIFNULL(FORM) \
({ \
autotype(FORM) Ax = (FORM); \
if (Ax == NULL && LOGGABLE(kLogWarn)) { \
++ftrace; \
__logerrno(__FILE__, __LINE__, #FORM); \
--ftrace; \
} \
Ax; \
})

View file

@ -83,7 +83,6 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
int64_t secs, nsec, dots;
if (!f) f = __log_file;
if (!f) return;
++ftrace;
t2 = nowl();
secs = t2;
nsec = (t2 - secs) * 1e9L;
@ -121,5 +120,4 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
__die();
unreachable;
}
--ftrace;
}

116
libc/nexgen32e/mul4x4adx.S Normal file
View file

@ -0,0 +1,116 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
Mul4x4Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
mov %rdx,%r12
push %rbx
sub $16,%rsp
mov (%rdx),%rdx
mov (%rsi),%rax
mov 16(%rsi),%r11
mov 24(%rsi),%r10
xor %r13d,%r13d
mulx %rax,%rbx,%rax
mov %rbx,-48(%rbp)
mov 8(%rsi),%rbx
mulx %rbx,%rdx,%rcx
adox %rdx,%rax
mov (%r12),%rdx
mulx %r11,%rdx,%r9
adox %rdx,%rcx
mov (%r12),%rdx
mulx %r10,%rdx,%r8
adox %rdx,%r9
adox %r13,%r8
xor %r13d,%r13d
mov (%rsi),%r14
mov 8(%r12),%rdx
mulx %r14,%r14,%r15
adox %r14,%rax
adcx %r15,%rcx
mov %rax,-56(%rbp)
mulx %rbx,%r14,%rax
adox %r14,%rcx
adcx %rax,%r9
mulx %r11,%r14,%rax
adox %r14,%r9
adcx %rax,%r8
mulx %r10,%rdx,%rax
adox %rdx,%r8
mov 16(%r12),%rdx
adcx %r13,%rax
adox %r13,%rax
mov (%rsi),%r13
xor %r15d,%r15d
mulx %r13,%r13,%r14
adox %r13,%rcx
adcx %r14,%r9
mulx %rbx,%r14,%r13
adox %r14,%r9
adcx %r13,%r8
mulx %r11,%r14,%r13
adox %r14,%r8
adcx %r13,%rax
mov (%rsi),%rsi
mulx %r10,%rdx,%r13
adox %rdx,%rax
adcx %r15,%r13
mov 24(%r12),%rdx
adox %r15,%r13
mulx %rsi,%r12,%rsi
xor %r14d,%r14d
adox %r12,%r9
adcx %rsi,%r8
mulx %rbx,%rsi,%rbx
adox %rsi,%r8
adcx %rbx,%rax
mulx %r11,%r11,%rsi
mov -56(%rbp),%rbx
mov %rcx,16(%rdi)
adcx %rsi,%r13
mov -48(%rbp),%rsi
mov %rbx,8(%rdi)
adox %r11,%rax
mov %r9,24(%rdi)
mov %r8,32(%rdi)
mov %rax,40(%rdi)
mulx %r10,%rdx,%r10
adox %rdx,%r13
adcx %r14,%r10
mov %r13,48(%rdi)
adox %r14,%r10
mov %rsi,(%rdi)
mov %r10,56(%rdi)
add $16,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn Mul4x4Adx,globl

182
libc/nexgen32e/mul6x6adx.S Normal file
View file

@ -0,0 +1,182 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
Mul6x6Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
push %rbx
mov %rdx,%rbx
sub $24,%rsp
mov (%rdx),%rdx
xor %r8d,%r8d
mulx (%rsi),%rcx,%rax
mulx 8(%rsi),%rdx,%r12
mov %rcx,-48(%rbp)
adox %rdx,%rax
mov (%rbx),%rdx
mulx 16(%rsi),%rdx,%r15
adox %rdx,%r12
mov (%rbx),%rdx
mulx 24(%rsi),%rdx,%r10
adox %rdx,%r15
mov (%rbx),%rdx
mulx 32(%rsi),%rdx,%r9
adox %rdx,%r10
mov (%rbx),%rdx
mulx 40(%rsi),%rdx,%rcx
adox %rdx,%r9
mov 8(%rbx),%rdx
adox %r8,%rcx
mulx (%rsi),%r13,%r11
xor %r8d,%r8d
adox %r13,%rax
adcx %r11,%r12
mov %rax,-56(%rbp)
mulx 8(%rsi),%r11,%rax
adox %r11,%r12
adcx %rax,%r15
mov %r12,%r14
mulx 16(%rsi),%r11,%rax
adox %r11,%r15
adcx %rax,%r10
mulx 24(%rsi),%r11,%rax
adox %r11,%r10
adcx %rax,%r9
mulx 32(%rsi),%r11,%rax
adox %r11,%r9
adcx %rax,%rcx
mulx 40(%rsi),%rdx,%rax
adox %rdx,%rcx
adcx %r8,%rax
mov 16(%rbx),%rdx
adox %r8,%rax
mulx (%rsi),%r13,%r8
xor %r11d,%r11d
adox %r13,%r14
mov %r14,-64(%rbp)
adcx %r8,%r15
mulx 8(%rsi),%r12,%r8
adox %r12,%r15
adcx %r8,%r10
mulx 16(%rsi),%r12,%r8
adox %r12,%r10
adcx %r8,%r9
mulx 24(%rsi),%r12,%r8
adox %r12,%r9
adcx %r8,%rcx
mulx 32(%rsi),%r12,%r8
adox %r12,%rcx
adcx %r8,%rax
mulx 40(%rsi),%rdx,%r8
adox %rdx,%rax
adcx %r11,%r8
mov 24(%rbx),%rdx
adox %r11,%r8
mulx (%rsi),%r13,%r11
xor %r12d,%r12d
adox %r13,%r15
adcx %r11,%r10
mulx 8(%rsi),%r13,%r11
adox %r13,%r10
adcx %r11,%r9
mulx 16(%rsi),%r13,%r11
adox %r13,%r9
adcx %r11,%rcx
mulx 24(%rsi),%r13,%r11
adox %r13,%rcx
adcx %r11,%rax
mulx 32(%rsi),%r13,%r11
adox %r13,%rax
adcx %r11,%r8
mulx 40(%rsi),%rdx,%r11
adox %rdx,%r8
mov 32(%rbx),%rdx
adcx %r12,%r11
mulx (%rsi),%r14,%r13
adox %r12,%r11
xor %r12d,%r12d
adox %r14,%r10
adcx %r13,%r9
mulx 8(%rsi),%r14,%r13
adox %r14,%r9
adcx %r13,%rcx
mulx 16(%rsi),%r14,%r13
adox %r14,%rcx
adcx %r13,%rax
mulx 24(%rsi),%r14,%r13
adox %r14,%rax
adcx %r13,%r8
mulx 32(%rsi),%r14,%r13
adox %r14,%r8
adcx %r13,%r11
mulx 40(%rsi),%rdx,%r13
adox %rdx,%r11
adcx %r12,%r13
mov 40(%rbx),%rdx
adox %r12,%r13
mulx (%rsi),%r14,%rbx
xor %r12d,%r12d
adox %r14,%r9
adcx %rbx,%rcx
mulx 8(%rsi),%r14,%rbx
adox %r14,%rcx
adcx %rbx,%rax
mulx 16(%rsi),%r14,%rbx
adox %r14,%rax
adcx %rbx,%r8
mulx 24(%rsi),%r14,%rbx
adox %r14,%r8
adcx %rbx,%r11
mulx 32(%rsi),%r14,%rbx
mulx 40(%rsi),%rsi,%rdx
adox %r14,%r11
adcx %rbx,%r13
adox %rsi,%r13
adcx %r12,%rdx
adox %r12,%rdx
mov -48(%rbp),%rsi
mov -56(%rbp),%rbx
mov %r15,24(%rdi)
mov -64(%rbp),%r14
mov %r13,80(%rdi)
mov %rbx,8(%rdi)
mov %r14,16(%rdi)
mov %rsi,(%rdi)
mov %r10,32(%rdi)
mov %r9,40(%rdi)
mov %rcx,48(%rdi)
mov %rax,56(%rdi)
mov %r8,64(%rdi)
mov %r11,72(%rdi)
mov %rdx,88(%rdi)
add $24,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn Mul6x6Adx,globl

483
libc/nexgen32e/mul8x8.S Normal file
View file

@ -0,0 +1,483 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
/ Computes 1024-bit product of 512-bit and 512-bit numbers.
/
/ Instructions: 262
/ Total Cycles: 114
/ Total uOps: 469
/ Dispatch Width: 6
/ uOps Per Cycle: 4.11
/ IPC: 2.30
/ Block RThroughput: 78.2
/
/ @param rdi receives 16 quadword result
/ @param rsi is left hand side which must have 8 quadwords
/ @param rdx is right hand side which must have 8 quadwords
/ @note words are host endian while array is little endian
/ @mayalias
Mul8x8Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
mov %rdx,%r12
push %rbx
sub $64,%rsp
mov (%rdx),%rdx
xor %r13d,%r13d
mulx (%rsi),%rax,%rcx
mov %rdi,-48(%rbp)
mov %rax,-56(%rbp)
mulx 8(%rsi),%rdx,%rax
adox %rdx,%rcx
mov (%r12),%rdx
mulx 16(%rsi),%rdx,%rbx
adox %rdx,%rax
mov (%r12),%rdx
mulx 24(%rsi),%rdx,%r11
adox %rdx,%rbx
mov (%r12),%rdx
mulx 32(%rsi),%rdx,%r10
adox %rdx,%r11
mov (%r12),%rdx
mulx 40(%rsi),%rdx,%r9
adox %rdx,%r10
mov (%r12),%rdx
mulx 48(%rsi),%rdx,%r8
adox %rdx,%r9
mov (%r12),%rdx
mulx 56(%rsi),%rdx,%rdi
adox %rdx,%r8
adox %r13,%rdi
xor %r13d,%r13d
mov 8(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rcx
adcx %r14,%rax
mov %rcx,-64(%rbp)
mulx 8(%rsi),%r14,%rcx
adox %r14,%rax
adcx %rcx,%rbx
mulx 16(%rsi),%r14,%rcx
adox %r14,%rbx
adcx %rcx,%r11
mulx 24(%rsi),%r14,%rcx
adox %r14,%r11
adcx %rcx,%r10
mulx 32(%rsi),%r14,%rcx
adox %r14,%r10
adcx %rcx,%r9
mulx 40(%rsi),%r14,%rcx
adox %r14,%r9
adcx %rcx,%r8
mulx 48(%rsi),%r14,%rcx
adox %r14,%r8
adcx %rcx,%rdi
mulx 56(%rsi),%rdx,%rcx
adox %rdx,%rdi
adcx %r13,%rcx
mov 16(%r12),%rdx
adox %r13,%rcx
mulx (%rsi),%r15,%r14
xor %r13d,%r13d
adox %r15,%rax
adcx %r14,%rbx
mov %rax,-72(%rbp)
mulx 8(%rsi),%r14,%rax
adox %r14,%rbx
adcx %rax,%r11
mulx 16(%rsi),%r14,%rax
adox %r14,%r11
adcx %rax,%r10
mulx 24(%rsi),%r14,%rax
adox %r14,%r10
adcx %rax,%r9
mulx 32(%rsi),%r14,%rax
adox %r14,%r9
adcx %rax,%r8
mulx 40(%rsi),%r14,%rax
adox %r14,%r8
adcx %rax,%rdi
mulx 48(%rsi),%r14,%rax
adox %r14,%rdi
adcx %rax,%rcx
mulx 56(%rsi),%rdx,%rax
adox %rdx,%rcx
adcx %r13,%rax
adox %r13,%rax
xor %r13d,%r13d
mov 24(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rbx
adcx %r14,%r11
mov %rbx,-80(%rbp)
mov %r11,%r15
mulx 8(%rsi),%r14,%rbx
adox %r14,%r15
adcx %rbx,%r10
mulx 16(%rsi),%rbx,%r11
adox %rbx,%r10
adcx %r11,%r9
mulx 24(%rsi),%rbx,%r11
adox %rbx,%r9
adcx %r11,%r8
mulx 32(%rsi),%rbx,%r11
adox %rbx,%r8
adcx %r11,%rdi
mulx 40(%rsi),%rbx,%r11
adox %rbx,%rdi
adcx %r11,%rcx
mulx 48(%rsi),%rbx,%r11
adox %rbx,%rcx
adcx %r11,%rax
mulx 56(%rsi),%rdx,%r11
adox %rdx,%rax
adcx %r13,%r11
mov 32(%r12),%rdx
adox %r13,%r11
xor %ebx,%ebx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r10
mov %r15,-88(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r10,%r15
adcx %r13,%r9
adox %r14,%r15
mulx 16(%rsi),%r13,%r10
adox %r13,%r9
adcx %r10,%r8
mulx 24(%rsi),%r13,%r10
adcx %r10,%rdi
adox %r13,%r8
mulx 32(%rsi),%r13,%r10
adox %r13,%rdi
adcx %r10,%rcx
mulx 40(%rsi),%r13,%r10
adox %r13,%rcx
adcx %r10,%rax
mulx 48(%rsi),%r13,%r10
adox %r13,%rax
adcx %r10,%r11
mulx 56(%rsi),%rdx,%r10
adox %rdx,%r11
adcx %rbx,%r10
mov 40(%r12),%rdx
adox %rbx,%r10
mulx (%rsi),%r14,%r13
xor %ebx,%ebx
adox %r14,%r15
mov %r15,-96(%rbp)
adcx %r13,%r9
mulx 8(%rsi),%r14,%r13
mov %r9,%r15
adox %r14,%r15
adcx %r13,%r8
mulx 16(%rsi),%r13,%r9
adox %r13,%r8
adcx %r9,%rdi
mulx 24(%rsi),%r13,%r9
adox %r13,%rdi
adcx %r9,%rcx
mulx 32(%rsi),%r13,%r9
adox %r13,%rcx
adcx %r9,%rax
mulx 40(%rsi),%r13,%r9
adox %r13,%rax
adcx %r9,%r11
mulx 48(%rsi),%r13,%r9
adox %r13,%r11
adcx %r9,%r10
mulx 56(%rsi),%rdx,%r9
adox %rdx,%r10
adcx %rbx,%r9
adox %rbx,%r9
xor %ebx,%ebx
mov 48(%r12),%rdx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r8
mov %r15,-104(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r8,%r15
adcx %r13,%rdi
adox %r14,%r15
mulx 16(%rsi),%r13,%r8
adox %r13,%rdi
adcx %r8,%rcx
mulx 24(%rsi),%r13,%r8
adox %r13,%rcx
adcx %r8,%rax
mulx 32(%rsi),%r13,%r8
adox %r13,%rax
adcx %r8,%r11
mulx 40(%rsi),%r13,%r8
adox %r13,%r11
adcx %r8,%r10
mulx 48(%rsi),%r13,%r8
adox %r13,%r10
adcx %r8,%r9
mulx 56(%rsi),%rdx,%r8
adox %rdx,%r9
mov 56(%r12),%rdx
adcx %rbx,%r8
mulx (%rsi),%r13,%r12
adox %rbx,%r8
xor %ebx,%ebx
adox %r13,%r15
adcx %r12,%rdi
mulx 8(%rsi),%r13,%r12
adox %r13,%rdi
adcx %r12,%rcx
mulx 16(%rsi),%r13,%r12
adox %r13,%rcx
adcx %r12,%rax
mulx 24(%rsi),%r13,%r12
adox %r13,%rax
adcx %r12,%r11
mulx 32(%rsi),%r13,%r12
adox %r13,%r11
adcx %r12,%r10
mulx 40(%rsi),%r13,%r12
adox %r13,%r10
adcx %r12,%r9
mulx 48(%rsi),%r13,%r12
mulx 56(%rsi),%rsi,%rdx
adox %r13,%r9
adcx %r12,%r8
adox %rsi,%r8
adcx %rbx,%rdx
mov -64(%rbp),%rsi
adox %rbx,%rdx
mov -48(%rbp),%rbx
mov -56(%rbp),%r14
mov %rsi,8(%rbx)
mov -72(%rbp),%rsi
mov %r14,(%rbx)
mov %rsi,16(%rbx)
mov -80(%rbp),%rsi
mov %rsi,24(%rbx)
mov -88(%rbp),%rsi
mov %rsi,32(%rbx)
mov -96(%rbp),%rsi
mov %rsi,40(%rbx)
mov -104(%rbp),%rsi
mov %r15,56(%rbx)
mov %rsi,48(%rbx)
mov %rdi,64(%rbx)
mov %rcx,72(%rbx)
mov %rax,80(%rbx)
mov %r11,88(%rbx)
mov %r10,96(%rbx)
mov %r9,104(%rbx)
mov %r8,112(%rbx)
mov %rdx,120(%rbx)
add $64,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn Mul8x8Adx,globl
.end
Timeline view: 0123456789 0123456789 0123456789 0123456789
Index 0123456789 0123456789 0123456789 0123456789
[0,0] DeeER. . . . . . . . . . . . . . . . pushq %r15
[0,1] D==eeER . . . . . . . . . . . . . . . pushq %r14
[0,2] .D===eeER . . . . . . . . . . . . . . . pushq %r13
[0,3] .D=====eeER . . . . . . . . . . . . . . pushq %r12
[0,4] . DeE-----R . . . . . . . . . . . . . . movq %rdx, %r12
[0,5] . D======eeER . . . . . . . . . . . . . . pushq %rbx
[0,6] . D========eER . . . . . . . . . . . . . . subq $64, %rsp
[0,7] . DeeeeeE----R . . . . . . . . . . . . . . movq (%rdx), %rdx
[0,8] . D---------R . . . . . . . . . . . . . . xorl %r13d, %r13d
[0,9] . D====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx
[0,10] . D======eE------R . . . . . . . . . . . . . movq %rdi, -48(%rbp)
[0,11] . D======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp)
[0,12] . D====eeeeeeeeeER. . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax
[0,13] . D============eER. . . . . . . . . . . . . adoxq %rdx, %rcx
[0,14] . DeeeeeE-------R. . . . . . . . . . . . . movq (%r12), %rdx
[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx
[0,16] . D============eE-R . . . . . . . . . . . . adoxq %rdx, %rax
[0,17] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,18] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11
[0,19] . .D=============eER . . . . . . . . . . . . adoxq %rdx, %rbx
[0,20] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,21] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10
[0,22] . . D=============eER . . . . . . . . . . . . adoxq %rdx, %r11
[0,23] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,24] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9
[0,25] . . D=============eER. . . . . . . . . . . . adoxq %rdx, %r10
[0,26] . . DeeeeeE--------R. . . . . . . . . . . . movq (%r12), %rdx
[0,27] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8
[0,28] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r9
[0,29] . . DeeeeeE--------R . . . . . . . . . . . movq (%r12), %rdx
[0,30] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi
[0,31] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r8
[0,32] . . .D=============eER . . . . . . . . . . . adoxq %r13, %rdi
[0,33] . . .D---------------R . . . . . . . . . . . xorl %r13d, %r13d
[0,34] . . .DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx
[0,35] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14
[0,36] . . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx
[0,37] . . . D=============eER . . . . . . . . . . . adcxq %r14, %rax
[0,38] . . . D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp)
[0,39] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx
[0,40] . . . D=============eER. . . . . . . . . . . adoxq %r14, %rax
[0,41] . . . D=============eER . . . . . . . . . . adcxq %rcx, %rbx
[0,42] . . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx
[0,43] . . . D==============eER . . . . . . . . . . adoxq %r14, %rbx
[0,44] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11
[0,45] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx
[0,46] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11
[0,47] . . . .D===============eER. . . . . . . . . . adcxq %rcx, %r10
[0,48] . . . .D====eeeeeeeeeE---R. . . . . . . . . . mulxq 32(%rsi), %r14, %rcx
[0,49] . . . .D================eER . . . . . . . . . adoxq %r14, %r10
[0,50] . . . . D================eER . . . . . . . . . adcxq %rcx, %r9
[0,51] . . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %r14, %rcx
[0,52] . . . . D=================eER . . . . . . . . . adoxq %r14, %r9
[0,53] . . . . D=================eER . . . . . . . . . adcxq %rcx, %r8
[0,54] . . . . D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx
[0,55] . . . . D==================eER. . . . . . . . . adoxq %r14, %r8
[0,56] . . . . D==================eER . . . . . . . . adcxq %rcx, %rdi
[0,57] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 56(%rsi), %rdx, %rcx
[0,58] . . . . D===================eER . . . . . . . . adoxq %rdx, %rdi
[0,59] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx
[0,60] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx
[0,61] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx
[0,62] . . . . .D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14
[0,63] . . . . .D---------------------R . . . . . . . . xorl %r13d, %r13d
[0,64] . . . . .D=======eE------------R . . . . . . . . adoxq %r15, %rax
[0,65] . . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx
[0,66] . . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp)
[0,67] . . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax
[0,68] . . . . . D============eE-----R . . . . . . . . adoxq %r14, %rbx
[0,69] . . . . . D=============eE----R . . . . . . . . adcxq %rax, %r11
[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax
[0,71] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11
[0,72] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10
[0,73] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax
[0,74] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10
[0,75] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9
[0,76] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax
[0,77] . . . . . .D===============eER. . . . . . . . adoxq %r14, %r9
[0,78] . . . . . .D================eER . . . . . . . adcxq %rax, %r8
[0,79] . . . . . .D====eeeeeeeeeE----R . . . . . . . mulxq 40(%rsi), %r14, %rax
[0,80] . . . . . . D================eER . . . . . . . adoxq %r14, %r8
[0,81] . . . . . . D=================eER . . . . . . . adcxq %rax, %rdi
[0,82] . . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax
[0,83] . . . . . . D=================eER . . . . . . . adoxq %r14, %rdi
[0,84] . . . . . . D==================eER. . . . . . . adcxq %rax, %rcx
[0,85] . . . . . . D====eeeeeeeeeE------R. . . . . . . mulxq 56(%rsi), %rdx, %rax
[0,86] . . . . . . D==================eER . . . . . . adoxq %rdx, %rcx
[0,87] . . . . . . D===================eER . . . . . . adcxq %r13, %rax
[0,88] . . . . . . D====================eER . . . . . . adoxq %r13, %rax
[0,89] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d
[0,90] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx
[0,91] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14
[0,92] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx
[0,93] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11
[0,94] . . . . . . .D===========eE-------R . . . . . . movq %rbx, -80(%rbp)
[0,95] . . . . . . .D=============eE-----R . . . . . . movq %r11, %r15
[0,96] . . . . . . .D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx
[0,97] . . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15
[0,98] . . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10
[0,99] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11
[0,100] . . . . . . . D==============eE--R . . . . . . adoxq %rbx, %r10
[0,101] . . . . . . . D===============eE-R . . . . . . adcxq %r11, %r9
[0,102] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11
[0,103] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9
[0,104] . . . . . . . D================eER . . . . . . adcxq %r11, %r8
[0,105] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11
[0,106] . . . . . . . D================eER. . . . . . adoxq %rbx, %r8
[0,107] . . . . . . . D=================eER . . . . . adcxq %r11, %rdi
[0,108] . . . . . . . D====eeeeeeeeeE-----R . . . . . mulxq 40(%rsi), %rbx, %r11
[0,109] . . . . . . . .D=================eER . . . . . adoxq %rbx, %rdi
[0,110] . . . . . . . .D==================eER . . . . . adcxq %r11, %rcx
[0,111] . . . . . . . .D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11
[0,112] . . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx
[0,113] . . . . . . . . D===================eER. . . . . adcxq %r11, %rax
[0,114] . . . . . . . . D====eeeeeeeeeE-------R. . . . . mulxq 56(%rsi), %rdx, %r11
[0,115] . . . . . . . . D===================eER . . . . adoxq %rdx, %rax
[0,116] . . . . . . . . D====================eER . . . . adcxq %r13, %r11
[0,117] . . . . . . . . DeeeeeE----------------R . . . . movq 32(%r12), %rdx
[0,118] . . . . . . . . D=====================eER . . . . adoxq %r13, %r11
[0,119] . . . . . . . . D=====E-----------------R . . . . xorl %ebx, %ebx
[0,120] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13
[0,121] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15
[0,122] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10
[0,123] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp)
[0,124] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13
[0,125] . . . . . . . . D=============eE------R . . . . movq %r10, %r15
[0,126] . . . . . . . . .D============eE------R . . . . adcxq %r13, %r9
[0,127] . . . . . . . . .D=============eE-----R . . . . adoxq %r14, %r15
[0,128] . . . . . . . . .D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10
[0,129] . . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9
[0,130] . . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8
[0,131] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10
[0,132] . . . . . . . . . D==============eE--R . . . . adcxq %r10, %rdi
[0,133] . . . . . . . . . D===============eE-R . . . . adoxq %r13, %r8
[0,134] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10
[0,135] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi
[0,136] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx
[0,137] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10
[0,138] . . . . . . . . . D================eER. . . . adoxq %r13, %rcx
[0,139] . . . . . . . . . D=================eER . . . adcxq %r10, %rax
[0,140] . . . . . . . . . D====eeeeeeeeeE-----R . . . mulxq 48(%rsi), %r13, %r10
[0,141] . . . . . . . . . .D=================eER . . . adoxq %r13, %rax
[0,142] . . . . . . . . . .D==================eER . . . adcxq %r10, %r11
[0,143] . . . . . . . . . .D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10
[0,144] . . . . . . . . . . D==================eER . . . adoxq %rdx, %r11
[0,145] . . . . . . . . . . D===================eER. . . adcxq %rbx, %r10
[0,146] . . . . . . . . . . DeeeeeE---------------R. . . movq 40(%r12), %rdx
[0,147] . . . . . . . . . . D====================eER . . adoxq %rbx, %r10
[0,148] . . . . . . . . . . D====eeeeeeeeeE-------R . . mulxq (%rsi), %r14, %r13
[0,149] . . . . . . . . . . D---------------------R . . xorl %ebx, %ebx
[0,150] . . . . . . . . . . D============eE-------R . . adoxq %r14, %r15
[0,151] . . . . . . . . . . D============eE------R . . movq %r15, -96(%rbp)
[0,152] . . . . . . . . . . D============eE------R . . adcxq %r13, %r9
[0,153] . . . . . . . . . . D=====eeeeeeeeeE-----R . . mulxq 8(%rsi), %r14, %r13
[0,154] . . . . . . . . . . D============eE-----R . . movq %r9, %r15
[0,155] . . . . . . . . . . D=============eE----R . . adoxq %r14, %r15
[0,156] . . . . . . . . . . D==============eE---R . . adcxq %r13, %r8
[0,157] . . . . . . . . . . .D====eeeeeeeeeE----R . . mulxq 16(%rsi), %r13, %r9
[0,158] . . . . . . . . . . .D==============eE--R . . adoxq %r13, %r8
[0,159] . . . . . . . . . . .D===============eE-R . . adcxq %r9, %rdi
[0,160] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 24(%rsi), %r13, %r9
[0,161] . . . . . . . . . . . D===============eER . . adoxq %r13, %rdi
[0,162] . . . . . . . . . . . D================eER . . adcxq %r9, %rcx
[0,163] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 32(%rsi), %r13, %r9
[0,164] . . . . . . . . . . . D================eER . . adoxq %r13, %rcx
[0,165] . . . . . . . . . . . D=================eER . . adcxq %r9, %rax
[0,166] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9
[0,167] . . . . . . . . . . . D=================eER. . adoxq %r13, %rax
[0,168] . . . . . . . . . . . D==================eER . adcxq %r9, %r11
[0,169] . . . . . . . . . . . D====eeeeeeeeeE-----R . mulxq 48(%rsi), %r13, %r9
[0,170] . . . . . . . . . . . D==================eER . adoxq %r13, %r11
[0,171] . . . . . . . . . . . D===================eER . adcxq %r9, %r10
[0,172] . . . . . . . . . . . .D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9
[0,173] . . . . . . . . . . . .D===================eER. adoxq %rdx, %r10
[0,174] . . . . . . . . . . . .D====================eER adcxq %rbx, %r9

View file

@ -104,7 +104,6 @@ privileged noasan void ftracer(void) {
p = mempcpy(p, symbol, symbolsize);
*p++ = ' ';
p += uint64toarray_radix10((stamp - laststamp) / 3.3, p);
*p++ = '\r';
*p++ = '\n';
write(2, g_buf, p - g_buf);
}

View file

@ -0,0 +1,25 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_APPEND_INTERNAL_H_
#define COSMOPOLITAN_LIBC_STDIO_APPEND_INTERNAL_H_
#define APPEND_COOKIE 21578
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct appendz {
size_t i; /* data size */
size_t n; /* allocation size */
};
int appendf(char **, const char *, ...);
int vappendf(char **, const char *, va_list);
int appends(char **, const char *);
int appendd(char **, const void *, size_t);
struct appendz appendz(char *);
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define appendf(BUF, FMT, ...) (appendf)(BUF, PFLINK(FMT), ##__VA_ARGS__)
#define vappendf(BUF, FMT, VA) (vappendf)(BUF, PFLINK(FMT), VA)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STDIO_APPEND_INTERNAL_H_ */

54
libc/stdio/appendd.c Normal file
View file

@ -0,0 +1,54 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/stdio/append.internal.h"
#include "libc/str/str.h"
#define W sizeof(size_t)
/**
* Appends raw data to buffer.
*/
int appendd(char **b, const void *s, size_t l) {
char *p;
struct appendz z;
z = appendz((p = *b));
if (ROUNDUP(z.i + l + 1, 8) + W > z.n) {
if (!z.n) z.n = W * 2;
while (ROUNDUP(z.i + l + 1, 8) + W > z.n) z.n += z.n >> 1;
z.n = ROUNDUP(z.n, W);
if ((p = realloc(p, z.n))) {
z.n = malloc_usable_size(p);
assert(!(z.n & (W - 1)));
*b = p;
} else {
return -1;
}
}
memcpy(p + z.i, s, l + 1);
z.i += l;
if (!IsTiny() && W == 8) {
z.i |= (size_t)APPEND_COOKIE << 48;
}
*(size_t *)(p + z.n - W) = z.i;
return l;
}

31
libc/stdio/appendf.c Normal file
View file

@ -0,0 +1,31 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/stdio/append.internal.h"
/**
* Appends formatted data to buffer.
*/
int(appendf)(char **b, const char *fmt, ...) {
int n;
va_list va;
va_start(va, fmt);
n = (vappendf)(b, fmt, va);
va_end(va);
return n;
}

27
libc/stdio/appends.c Normal file
View file

@ -0,0 +1,27 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/stdio/append.internal.h"
#include "libc/str/str.h"
/**
* Appends string to buffer.
*/
int appends(char **b, const char *s) {
return appendd(b, s, strlen(s));
}

45
libc/stdio/appendz.c Normal file
View file

@ -0,0 +1,45 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/mem/mem.h"
#include "libc/stdio/append.internal.h"
#define W sizeof(size_t)
/**
* Returns size of append buffer.
*/
struct appendz appendz(char *p) {
struct appendz z;
if (p) {
z.n = malloc_usable_size(p);
assert(z.n >= W * 2 && !(z.n & (W - 1)));
z.i = *(size_t *)(p + z.n - W);
if (!IsTiny() && W == 8) {
assert((z.i >> 48) == APPEND_COOKIE);
z.i &= 0x0000ffffffffffff;
}
assert(z.n >= z.i);
} else {
z.i = 0;
z.n = 0;
}
return z;
}

View file

@ -36,9 +36,15 @@ static textstartup void __stdout_init() {
struct FILE *sf;
sf = stdout;
asm("" : "+r"(sf));
if (IsWindows() || ischardev(pushpop(sf->fd))) {
sf->bufmode = _IOLBF;
}
/*
* Unlike other C libraries we don't bother calling fstat() to check
* if stdio is a character device and we instead choose to always line
* buffer it. We need it because there's no way to use the unbuffer
* command on a statically linked binary. This still goes fast. We
* value latency more than throughput, and stdio isn't the best api
* when the goal is throughput.
*/
sf->bufmode = _IOLBF;
__fflush_register(sf);
}

59
libc/stdio/vappendf.c Normal file
View file

@ -0,0 +1,59 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/fmt/fmt.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/stdio/append.internal.h"
#define W sizeof(size_t)
/**
* Appends data to buffer.
*/
int(vappendf)(char **b, const char *f, va_list v) {
char *p;
int r, s;
va_list w;
struct appendz z;
z = appendz((p = *b));
va_copy(w, v);
if ((r = (vsnprintf)(p + z.i, z.n ? z.n - W - z.i : 0, f, v)) >= 0) {
if (ROUNDUP(z.i + r + 1, 8) + W > z.n) {
if (!z.n) z.n = W * 2;
while (ROUNDUP(z.i + r + 1, 8) + W > z.n) z.n += z.n >> 1;
z.n = ROUNDUP(z.n, W);
if ((p = realloc(p, z.n))) {
z.n = malloc_usable_size(p);
assert(!(z.n & (W - 1)));
s = (vsnprintf)(p + z.i, z.n - W - z.i, f, w);
assert(s == r);
*b = p;
} else {
va_end(w);
return -1;
}
}
z.i += r;
if (!IsTiny() && W == 8) z.i |= (size_t)APPEND_COOKIE << 48;
*(size_t *)(p + z.n - W) = z.i;
}
va_end(w);
return r;
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/str/str.h"
/**
@ -25,10 +26,23 @@
int memcasecmp(const void *p, const void *q, size_t n) {
int c;
size_t i;
uint64_t w;
const unsigned char *a, *b;
if ((a = p) != (b = q)) {
for (i = 0; i < n; ++i) {
if ((c = kToLower[a[i]] - kToLower[b[i]])) {
while (i + 8 <= n) {
w = READ64LE(a);
w ^= READ64LE(b);
if (w) {
i += (unsigned)__builtin_ctzll(w) >> 3;
break;
} else {
i += 8;
}
}
if (i == n) {
break;
} else if ((c = kToLower[a[i]] - kToLower[b[i]])) {
return c;
}
}