Polish greenbean example a bit more

Windows support for this example is still a work in progress. It's
encountering some unusual crashes. Thank you Chris Wellons for the cool
synchronization code too!
This commit is contained in:
Justine Tunney 2022-05-15 09:14:48 -07:00
parent e5e141d9b5
commit 91ee2b19d4
4 changed files with 145 additions and 81 deletions

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/sigbits.h"
#include "libc/calls/struct/sigset.h"
@ -27,6 +28,7 @@
#include "libc/log/check.h"
#include "libc/log/log.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/goodsocket.internal.h"
#include "libc/sock/sock.h"
#include "libc/str/str.h"
@ -71,25 +73,25 @@
* Like redbean, greenbean has superior performance too, with an
* advantage on benchmarks biased towards high connection counts
*
* $ sudo wrk -c 300 -t 32 --latency http://127.0.0.1:8080/
* Running 10s test @ http://127.0.0.1:8080/
* $ sudo wrk -c 300 -t 32 --latency http://10.10.10.124:8080/
* Running 10s test @ http://10.10.10.124:8080/
* 32 threads and 300 connections
* Thread Stats Avg Stdev Max +/- Stdev
* Latency 36.21us 133.39us 8.10ms 98.52%
* Req/Sec 73.24k 28.92k 131.06k 47.49%
* Latency 1.07ms 8.27ms 138.55ms 98.58%
* Req/Sec 37.98k 12.61k 117.65k 80.11%
* Latency Distribution
* 50% 22.00us
* 75% 29.00us
* 90% 40.00us
* 99% 333.00us
* 4356560 requests in 4.62s, 1.29GB read
* Requests/sec: 942663.73
* Transfer/sec: 284.98MB
* 50% 200.00us
* 75% 227.00us
* 90% 303.00us
* 99% 32.46ms
* 10033090 requests in 8.31s, 2.96GB read
* Requests/sec: 1207983.58
* Transfer/sec: 365.19MB
*
*/
#define THREADS 32
#define HEARTBEAT 500
#define THREADS 512
#define HEARTBEAT 100
#define KEEPALIVE 5000
#define LOGGING 0
@ -98,23 +100,106 @@
"Referrer-Policy: origin\r\n" \
"Cache-Control: private; max-age=0\r\n"
int workers;
int barrier;
////////////////////////////////////////////////////////////////////////////////
// BEGIN: Chris Wellons's Public Domain GNU Atomics Library
#define BARRIER_INC(x) __atomic_add_fetch(x, 1, __ATOMIC_SEQ_CST)
#define BARRIER_GET(x) __atomic_load_n(x, __ATOMIC_SEQ_CST)
#define ATOMIC_LOAD(q) __atomic_load_n(q, __ATOMIC_ACQUIRE)
#define ATOMIC_RLOAD(q) __atomic_load_n(q, __ATOMIC_RELAXED)
#define ATOMIC_STORE(q, v) __atomic_store_n(q, v, __ATOMIC_RELEASE)
#define ATOMIC_ADD(q, c) __atomic_add_fetch(q, c, __ATOMIC_RELEASE)
#define ATOMIC_AND(q, m) __atomic_and_fetch(q, m, __ATOMIC_RELEASE)
#define ATOMIC_CAS(q, e, d) \
__atomic_compare_exchange_n(q, e, d, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED)
// Return the array index for then next value to be pushed. The size of this
// array must be (1 << exp) elements. Write the value into this array index,
// then commit it. With a single-consumer queue, this element store need not
// be atomic. The value will appear in the queue after the commit. Returns
// -1 if the queue is full.
static int queue_push(uint32_t *q, int exp) {
uint32_t r = ATOMIC_LOAD(q);
int mask = (1u << exp) - 1;
int head = r & mask;
int tail = r >> 16 & mask;
int next = (head + 1u) & mask;
if (r & 0x8000) { // avoid overflow on commit
ATOMIC_AND(q, ~0x8000);
}
return next == tail ? -1 : head;
}
// Commits and completes the push operation. Do this after storing into the
// array. This operation cannot fail.
static void queue_push_commit(uint32_t *q) {
ATOMIC_ADD(q, 1);
}
// Return the array index for the next value to be popped. The size of this
// array must be (1 << exp) elements. Read from this array index, then
// commit the pop. This element load need not be atomic. The value will be
// removed from the queue after the commit. Returns -1 if the queue is
// empty.
static int queue_pop(uint32_t *q, int exp) {
uint32_t r = ATOMIC_LOAD(q);
int mask = (1u << exp) - 1;
int head = r & mask;
int tail = r >> 16 & mask;
return head == tail ? -1 : tail;
}
// Commits and completes the pop operation. Do this after loading from the
// array. This operation cannot fail.
static void queue_pop_commit(uint32_t *q) {
ATOMIC_ADD(q, 0x10000);
}
// Like queue_pop() but for multiple-consumer queues. The element load must
// be atomic since it is concurrent with the producer's push, though it can
// use a relaxed memory order. The loaded value must not be used unless the
// commit is successful. Stores a temporary "save" to be used at commit.
static int queue_mpop(uint32_t *q, int exp, uint32_t *save) {
uint32_t r = *save = ATOMIC_LOAD(q);
int mask = (1u << exp) - 1;
int head = r & mask;
int tail = r >> 16 & mask;
return head == tail ? -1 : tail;
}
// Like queue_pop_commit() but for multiple-consumer queues. It may fail if
// another consumer pops concurrently, in which case the pop must be retried
// from the beginning.
static bool queue_mpop_commit(uint32_t *q, uint32_t save) {
return ATOMIC_CAS(q, &save, save + 0x10000);
}
// Spin-lock barrier for n threads, where n is a power of two.
// Initialize *barrier to zero.
static void barrier_waitn(int *barrier, int n) {
int v = BARRIER_INC(barrier);
if (v & (n - 1)) {
for (v &= n; (BARRIER_GET(barrier) & n) == v;) {
donothing;
}
}
}
// END: Chris Wellons's Public Domain GNU Atomics Library
////////////////////////////////////////////////////////////////////////////////
int barrier1;
int itsbegun;
int closingtime;
int barrier2;
int itsdone;
int Worker(void *id) {
int server, itsover, ready, yes = 1;
int server, yes = 1;
// announce to the main process this has spawned
kprintf(" #%.2ld", (intptr_t)id);
__atomic_add_fetch(&workers, 1, __ATOMIC_SEQ_CST);
// wait for all threads to spawn before we proceed
for (;;) {
__atomic_load(&barrier, &ready, __ATOMIC_SEQ_CST);
if (ready) break;
__builtin_ia32_pause();
}
kprintf(" %d", id);
barrier_waitn(&barrier1, THREADS);
itsbegun = true;
// load balance incoming connections for port 8080 across all threads
// hangup on any browser clients that lag for more than a few seconds
@ -131,7 +216,7 @@ int Worker(void *id) {
CHECK_EQ(0, listen(server, 10));
// connection loop
for (;;) {
while (!closingtime) {
struct tm tm;
int64_t unixts;
struct Url url;
@ -143,15 +228,8 @@ int Worker(void *id) {
char inbuf[1500], outbuf[512], *p, *q;
int clientip, client, inmsglen, outmsglen;
__atomic_load(&closingtime, &itsover, __ATOMIC_SEQ_CST);
if (itsover) break;
if (!IsLinux() &&
poll(&(struct pollfd){server, POLLIN}, 1, HEARTBEAT) < 1) {
continue;
}
// wait for client connection
if (poll(&(struct pollfd){server, POLLIN}, 1, HEARTBEAT) < 1) continue;
clientaddrsize = sizeof(clientaddr);
client = accept(server, &clientaddr, &clientaddrsize);
@ -163,7 +241,7 @@ int Worker(void *id) {
// inherited by the accepted sockets, but using them also has the
// side-effect that the listening socket fails with EAGAIN, every
// several seconds. we can use that to our advantage to check for
// the ctrl-c shutdown event; otherwise, we retry the accept call
// the ctrl-c shutdowne event; otherwise, we retry the accept call
continue;
}
@ -179,7 +257,7 @@ int Worker(void *id) {
#if LOGGING
// log the incoming http message
clientip = ntohl(clientaddr.sin_addr.s_addr);
kprintf("#%.2ld get some %d.%d.%d.%d:%d %#.*s\n", (intptr_t)id,
kprintf("#%.4x get some %d.%d.%d.%d:%d %#.*s\n", (intptr_t)id,
(clientip & 0xff000000) >> 030, (clientip & 0x00ff0000) >> 020,
(clientip & 0x0000ff00) >> 010, (clientip & 0x000000ff) >> 000,
ntohs(clientaddr.sin_port), msg.uri.b - msg.uri.a,
@ -239,8 +317,9 @@ int Worker(void *id) {
// inform the parent that this clone has finished
close(server);
kprintf(" #%.2ld", (intptr_t)id);
__atomic_sub_fetch(&workers, 1, __ATOMIC_SEQ_CST);
kprintf(" %d", id);
barrier_waitn(&barrier2, THREADS);
itsdone = true;
return 0;
}
@ -249,45 +328,20 @@ void OnCtrlC(int sig) {
}
int main(int argc, char *argv[]) {
/* ShowCrashReports(); */
int64_t loadtzdbearly;
int i, gotsome, haveleft, ready = 1;
ShowCrashReports();
kprintf("welcome to greenbean\n");
gmtime(&loadtzdbearly);
// spawn a bunch of threads
for (i = 0; i < THREADS; ++i) {
for (int i = 0; i < THREADS; ++i) {
void *stack = mmap(0, 65536, PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0);
clone(Worker, stack, 65536,
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(intptr_t)i, 0, 0, 0, 0);
}
// wait for all threads to spawn
for (;;) {
__atomic_load(&workers, &gotsome, __ATOMIC_SEQ_CST);
if (workers == THREADS) break;
__builtin_ia32_pause();
}
// all threads are spawned so unleash the barrier
kprintf("\ngreenbean is ready to go\n");
while (!ATOMIC_LOAD(&itsbegun)) usleep(HEARTBEAT * 1000);
sigaction(SIGINT, &(struct sigaction){.sa_handler = OnCtrlC}, 0);
__atomic_store(&barrier, &ready, __ATOMIC_SEQ_CST);
// main process does nothing until it's closing time
for (;;) {
__atomic_load(&workers, &haveleft, __ATOMIC_SEQ_CST);
if (!haveleft) break;
__builtin_ia32_pause();
usleep(HEARTBEAT * 1000);
if (closingtime) {
kprintf("\rgreenbean is shutting down...\n");
}
}
kprintf("\n");
kprintf("thank you for flying greenbean\n");
kprintf("\nit's begun\n");
while (!ATOMIC_LOAD(&itsdone)) usleep(HEARTBEAT * 1000);
kprintf("\nthank you for flying greenbean\n");
}

View file

@ -8,7 +8,7 @@
#define _KERNTRACE 0 /* not configurable w/ flag yet */
#define _POLLTRACE 0 /* not configurable w/ flag yet */
#define _DATATRACE 1 /* not configurable w/ flag yet */
#define _NTTRACE 0 /* not configurable w/ flag yet */
#define _NTTRACE 1 /* not configurable w/ flag yet */
#define STRACE_PROLOGUE "%rSYS %5P %'18T "

View file

@ -121,6 +121,16 @@ forceinline void MakeLongDoubleLongAgain(void) {
asm volatile("fldcw\t%0" : /* no outputs */ : "m"(x87cw));
}
// https://nullprogram.com/blog/2022/02/18/
static inline char16_t *MyCommandLine(void) {
void *cmd;
asm("mov\t%%gs:(0x60),%0\n"
"mov\t0x20(%0),%0\n"
"mov\t0x78(%0),%0\n"
: "=r"(cmd));
return cmd;
}
static inline size_t StrLen16(const char16_t *s) {
size_t n;
for (n = 0;; ++n) {
@ -271,7 +281,7 @@ __msabi textwindows int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
#if !IsTiny()
__wincrashearly = AddVectoredExceptionHandler(1, (void *)OnEarlyWinCrash);
#endif
cmdline = GetCommandLine();
cmdline = MyCommandLine();
#ifdef SYSDEBUG
/* sloppy flag-only check for early initialization */
if (__strstr16(cmdline, u"--strace")) ++__strace;

View file

@ -1158,18 +1158,18 @@ syscon ms MS_INVALIDATE 2 2 2 4 2 0
# statvfs() flags
#
# group name GNU/Systemd XNU's Not UNIX! FreeBSD OpenBSD NetBSD The New Technology Commentary
syscon statvfs ST_NOSUID 2 2 2 2 2 0 # unix consensus
syscon statvfs ST_RDONLY 1 1 1 1 1 0 # unix consensus
syscon statvfs ST_NOSUID 2 2 2 2 2 0 # unix consensus
syscon statvfs ST_NODEV 4 0 0 0 0x00000010 0
syscon statvfs ST_NOEXEC 8 0 0 0 4 0
syscon statvfs ST_SYNCHRONOUS 16 0 0 0 2 0
syscon statvfs ST_APPEND 0x0100 0 0 0 0 0
syscon statvfs ST_IMMUTABLE 0x0200 0 0 0 0 0
syscon statvfs ST_MANDLOCK 0x40 0 0 0 0 0
syscon statvfs ST_MANDLOCK 0x0040 0 0 0 0 0
syscon statvfs ST_NOATIME 0x0400 0 0 0x04000000 0 0
syscon statvfs ST_NODEV 4 0 0 0 0x00000010 0
syscon statvfs ST_NODIRATIME 0x0800 0 0 0 0 0
syscon statvfs ST_NOEXEC 8 0 0 0 4 0
syscon statvfs ST_WRITE 0x0080 0 0 0 0 0
syscon statvfs ST_RELATIME 0x1000 0 0 0 0x00020000 0
syscon statvfs ST_SYNCHRONOUS 0x10 0 0 0 2 0
syscon statvfs ST_WRITE 0x80 0 0 0 0 0
# sendfile() flags
#
@ -1442,7 +1442,7 @@ syscon termios IUTF8 0b0100000000000000 0b0100000000000000 0 0 0 0b010
#
# group name GNU/Systemd XNU's Not UNIX! FreeBSD OpenBSD NetBSD The New Technology Commentary
syscon termios OPOST 0b0000000000000001 0b000000000000000001 0b000000000000000001 0b0000000000000001 0b0000000000000001 0b0000000000000001 # termios.c_oflag&=~OPOST disables output processing magic, e.g. MULTICS newlines
syscon termios OLCUC 0b0000000000000010 0 0 0b0000000000100000 0 0b0000000000000010 # termios.c_oflag|=OLCUC maps a-z → A-Z output
syscon termios OLCUC 0b0000000000000010 0 0 0b0000000000100000 0 0b0000000000000010 # termios.c_oflag|=OLCUC maps a-z → A-Z output (SHOUTING)
syscon termios ONLCR 0b0000000000000100 0b000000000000000010 0b000000000000000010 0b0000000000000010 0b0000000000000010 0b0000000000000100 # termios.c_oflag|=ONLCR map \n → \r\n output (MULTICS newline) and requires OPOST
syscon termios OCRNL 0b0000000000001000 0b000000000000010000 0b000000000000010000 0b0000000000010000 0b0000000000010000 0b0000000000001000 # termios.c_oflag|=OCRNL maps \r → \n output
syscon termios ONOCR 0b0000000000010000 0b000000000000100000 0b000000000000100000 0b0000000001000000 0b0000000001000000 0b0000000000010000 # termios.c_oflag|=ONOCR maps \r → ∅ output iff column 0
@ -1478,14 +1478,14 @@ syscon termios FF1 0b1000000000000000 0b000100000000000000 0b0001000000000
# Teletypewriter Special Control Character Assignments
#
# group name GNU/Systemd XNU's Not UNIX! FreeBSD OpenBSD NetBSD The New Technology Commentary
syscon termios VMIN 6+1 16 16 16 16 6 # termios.c_cc[VMIN]=𝑥 in non-canonical mode can be set to 0 for non-blocking reads, 1 for single character raw mode reads, or higher to buffer
syscon termios VTIME 5+1 17 17 17 17 5 # termios.c_cc[VTIME]=𝑥 sets non-canonical read timeout to 𝑥×𝟷𝟶𝟶ms which is needed when entering escape sequences manually with the escape key
syscon termios NCCS 20 20 20 20 20 20 # ARRAYLEN(termios.c_cc); we schlep c_line into c_cc on linux
syscon termios VINTR 0+1 8 8 8 8 0 # termios.c_cc[VINTR]=𝑥
syscon termios VQUIT 1+1 9 9 9 9 1 # termios.c_cc[VQUIT]=𝑥
syscon termios VERASE 2+1 3 3 3 3 2 # termios.c_cc[VERASE]=𝑥
syscon termios VKILL 3+1 5 5 5 5 3 # termios.c_cc[VKILL]=𝑥
syscon termios VEOF 4+1 0 0 0 0 4 # termios.c_cc[VEOF]=𝑥
syscon termios VTIME 5+1 17 17 17 17 5 # termios.c_cc[VTIME]=𝑥 sets non-canonical read timeout to 𝑥×𝟷𝟶𝟶ms which is needed when entering escape sequences manually with the escape key
syscon termios VMIN 6+1 16 16 16 16 6 # termios.c_cc[VMIN]=𝑥 in non-canonical mode can be set to 0 for non-blocking reads, 1 for single character raw mode reads, or higher to buffer
syscon termios VSWTC 7+1 0 0 0 0 7 # termios.c_cc[VSWTC]=𝑥
syscon termios VSTART 8+1 12 12 12 12 8 # termios.c_cc[VSTART]=𝑥
syscon termios VSTOP 9+1 13 13 13 13 9 # termios.c_cc[VSTOP]=𝑥