cosmopolitan/tool/build/runitd.c

824 lines
26 KiB
C
Raw Normal View History

2020-06-15 14:18:57 +00:00
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
2020-06-15 14:18:57 +00:00
Copyright 2021 Justine Alexandra Roberts Tunney
2020-06-15 14:18:57 +00:00
2020-12-28 01:18:44 +00:00
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
2020-06-15 14:18:57 +00:00
2020-12-28 01:18:44 +00:00
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
2020-06-15 14:18:57 +00:00
*/
2023-09-10 15:12:43 +00:00
#include "libc/assert.h"
#include "libc/atomic.h"
2020-06-15 14:18:57 +00:00
#include "libc/calls/calls.h"
2023-09-10 15:12:43 +00:00
#include "libc/calls/struct/rusage.h"
#include "libc/calls/struct/sigaction.h"
2023-09-10 15:12:43 +00:00
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/stat.h"
2023-08-08 03:22:49 +00:00
#include "libc/calls/struct/timespec.h"
#include "libc/calls/struct/timeval.h"
#include "libc/dce.h"
2020-06-15 14:18:57 +00:00
#include "libc/errno.h"
#include "libc/fmt/conv.h"
2023-09-12 15:58:57 +00:00
#include "libc/fmt/itoa.h"
2023-06-18 12:39:31 +00:00
#include "libc/fmt/libgen.h"
2023-09-10 15:12:43 +00:00
#include "libc/intrin/kprintf.h"
#include "libc/log/appendresourcereport.internal.h"
2020-06-15 14:18:57 +00:00
#include "libc/log/check.h"
#include "libc/macros.internal.h"
#include "libc/mem/gc.h"
#include "libc/mem/leaks.h"
2023-08-20 09:13:02 +00:00
#include "libc/mem/mem.h"
2021-08-26 04:35:58 +00:00
#include "libc/nexgen32e/crc32.h"
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
#include "libc/proc/posix_spawn.h"
#include "libc/runtime/internal.h"
2020-06-15 14:18:57 +00:00
#include "libc/runtime/runtime.h"
2023-09-10 15:12:43 +00:00
#include "libc/runtime/syslib.internal.h"
#include "libc/serialize.h"
2020-06-15 14:18:57 +00:00
#include "libc/sock/sock.h"
Prove that Makefile is fully defined The whole repository is now buildable with GNU Make Landlock sandboxing. This proves that no Makefile targets exist which touch files other than their declared prerequisites. In order to do this, we had to: 1. Stop code morphing GCC output in package.com and instead run a newly introduced FIXUPOBJ.COM command after GCC invocations. 2. Disable all the crumby Python unit tests that do things like create files in the current directory, or rename() files between folders. This ended up being a lot of tests, but most of them are still ok. 3. Introduce an .UNSANDBOXED variable to GNU Make to disable Landlock. We currently only do this for things like `make tags`. 4. This change deletes some GNU Make code that was preventing the execve() optimization from working. This means it should no longer be necessary in most cases for command invocations to be indirected through the cocmd interpreter. 5. Missing dependencies had to be declared in certain places, in cases where they couldn't be automatically determined by MKDEPS.COM 6. The libcxx header situation has finally been tamed. One of the things that makes this difficult is MKDEPS.COM only wants to consider the first 64kb of a file, in order to go fast. But libcxx likes to have #include lines buried after huge documentation. 7. An .UNVEIL variable has been introduced to GNU Make just in case we ever wish to explicitly specify additional things that need to be whitelisted which aren't strictly prerequisites. This works in a manner similar to the recently introduced .EXTRA_PREREQS feature. There's now a new build/bootstrap/make.com prebuilt binary available. It should no longer be possible to write invalid Makefile code.
2022-08-06 10:51:50 +00:00
#include "libc/sock/struct/pollfd.h"
#include "libc/sock/struct/sockaddr.h"
2023-09-10 15:12:43 +00:00
#include "libc/stdio/append.h"
#include "libc/stdio/rand.h"
2020-06-15 14:18:57 +00:00
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/af.h"
2023-09-10 15:12:43 +00:00
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/clock.h"
2020-06-15 14:18:57 +00:00
#include "libc/sysv/consts/ex.h"
#include "libc/sysv/consts/exit.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/inaddr.h"
#include "libc/sysv/consts/ipproto.h"
#include "libc/sysv/consts/itimer.h"
2020-06-15 14:18:57 +00:00
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/poll.h"
2023-09-10 15:12:43 +00:00
#include "libc/sysv/consts/posix.h"
2020-06-15 14:18:57 +00:00
#include "libc/sysv/consts/sa.h"
2022-05-22 15:13:13 +00:00
#include "libc/sysv/consts/sig.h"
2020-06-15 14:18:57 +00:00
#include "libc/sysv/consts/so.h"
#include "libc/sysv/consts/sock.h"
#include "libc/sysv/consts/sol.h"
#include "libc/sysv/consts/w.h"
2023-09-10 15:12:43 +00:00
#include "libc/temp.h"
#include "libc/thread/thread.h"
#include "libc/thread/thread2.h"
#include "libc/time.h"
2020-06-15 14:18:57 +00:00
#include "libc/x/x.h"
2023-09-10 15:12:43 +00:00
#include "libc/x/xsigaction.h"
#include "net/http/escape.h"
2021-08-14 13:17:56 +00:00
#include "net/https/https.h"
#include "third_party/getopt/getopt.internal.h"
2023-09-12 15:58:57 +00:00
#include "third_party/mbedtls/debug.h"
2021-08-07 20:22:35 +00:00
#include "third_party/mbedtls/ssl.h"
2022-05-15 06:17:22 +00:00
#include "third_party/zlib/zlib.h"
2021-08-07 20:22:35 +00:00
#include "tool/build/lib/eztls.h"
2021-08-14 13:17:56 +00:00
#include "tool/build/lib/psk.h"
2020-06-15 14:18:57 +00:00
#include "tool/build/runit.h"
/**
* @fileoverview Remote test runner daemon.
* Delivers 10x latency improvement over SSH (100x if Debian defaults)
*
* Here's how it handles connections:
*
* 1. Receives atomically-written request header, comprised of:
*
* - 4 byte nbo magic = 0xFEEDABEEu
* - 1 byte command = kRunitExecute
* - 4 byte nbo name length in bytes, e.g. "test1"
* - 4 byte nbo executable file length in bytes
* - <name bytes> (no NUL terminator)
* - <file bytes> (it's binary data)
*
* 2. Runs program, after verifying it came from the IP that spawned
* this program via SSH. Be sure to only run this over a trusted
* physically-wired network. To use this software on untrustworthy
* networks, wrap it with stunnel and use your own CA.
*
* 3. Sends stdout/stderr fragments, potentially multiple times:
*
* - 4 byte nbo magic = 0xFEEDABEEu
* - 1 byte command = kRunitStdout/Stderr
* - 4 byte nbo byte length
* - <chunk bytes>
*
* 4. Sends process exit code:
*
* - 4 byte nbo magic = 0xFEEDABEEu
* - 1 byte command = kRunitExit
* - 1 byte exit status
*/
2023-08-08 03:22:49 +00:00
#define DEATH_CLOCK_SECONDS 300
Add SSL to redbean Your redbean can now interoperate with clients that require TLS crypto. This is accomplished using a protocol polyglot that lets us distinguish between HTTP and HTTPS regardless of the port number. Certificates will be generated automatically, if none are supplied by the user. Footprint increases by only a few hundred kb so redbean in MODY=tiny is now 1.0mb - Add lseek() polyfills for ZIP executable - Automatically polyfill /tmp/FOO paths on NT - Fix readdir() / ftw() / nftw() bugs on Windows - Introduce -B flag for slower SSL that's stronger - Remove mbedtls features Cosmopolitan doesn't need - Have base64 decoder support the uri-safe alternative - Remove Truncated HMAC because it's forbidden by the IETF - Add all the mbedtls test suites and make them go 3x faster - Support opendir() / readdir() / closedir() on ZIP executable - Use Everest for ECDHE-ECDSA because it's so good it's so good - Add tinier implementation of sha1 since it's not worth the rom - Add chi-square monte-carlo mean correlation tests for getrandom() - Source entropy on Windows from the proper interface everyone uses We're continuing to outperform NGINX and other servers on raw message throughput. Using SSL means that instead of 1,000,000 qps you can get around 300,000 qps. However redbean isn't as fast as NGINX yet at SSL handshakes, since redbean can do 2,627 per second and NGINX does 4.3k Right now, the SSL UX story works best if you give your redbean a key signing key since that can be easily generated by openssl using a one liner then redbean will do all the things that are impossibly hard to do like signing ecdsa and rsa certificates that'll work in chrome. We should integrate the let's encrypt acme protocol in the future. Live Demo: https://redbean.justine.lol/ Root Cert: https://redbean.justine.lol/redbean1.crt
2021-06-24 19:31:26 +00:00
2020-06-15 14:18:57 +00:00
#define kLogFile "o/runitd.log"
#define kLogMaxBytes (2 * 1000 * 1000)
2023-09-10 15:12:43 +00:00
#define LOG_LEVEL_WARN 0
#define LOG_LEVEL_INFO 1
2023-09-12 15:58:57 +00:00
#define LOG_LEVEL_VERB 2
2023-09-10 15:12:43 +00:00
#define LOG_LEVEL_DEBU 3
#define DEBUF(FMT, ...) LOGF(DEBU, FMT, ##__VA_ARGS__)
#define VERBF(FMT, ...) LOGF(VERB, FMT, ##__VA_ARGS__)
#define INFOF(FMT, ...) LOGF(INFO, FMT, ##__VA_ARGS__)
#define WARNF(FMT, ...) LOGF(WARN, FMT, ##__VA_ARGS__)
#define LOGF(LVL, FMT, ...) \
do { \
if (g_log_level >= LOG_LEVEL_##LVL) { \
kprintf("%r" #LVL " %6P %'18T %s:%d " FMT "\n", __FILE__, __LINE__, \
##__VA_ARGS__); \
} \
} while (0)
struct Client {
int fd;
int pid;
int pipe[2];
pthread_t th;
uint32_t addrsize;
struct sockaddr_in addr;
bool once;
int zstatus;
z_stream zs;
struct {
size_t off;
size_t len;
size_t cap;
char *data;
} rbuf;
char *output;
2023-09-12 15:58:57 +00:00
char tmpexepath[128];
2023-09-10 15:12:43 +00:00
char buf[32768];
};
char *g_psk;
int g_log_level;
bool use_ftrace;
bool use_strace;
2023-09-10 15:12:43 +00:00
char g_hostname[256];
int g_bogusfd, g_servfd;
atomic_bool g_interrupted;
2020-06-15 14:18:57 +00:00
struct sockaddr_in g_servaddr;
bool g_daemonize, g_sendready;
2020-06-15 14:18:57 +00:00
void OnInterrupt(int sig) {
g_interrupted = true;
}
2023-09-10 15:12:43 +00:00
void Close(int *fd) {
if (*fd > 0) {
close(*fd);
*fd = -1; // poll ignores -1
2020-06-15 14:18:57 +00:00
}
}
wontreturn void ShowUsage(FILE *f, int rc) {
2020-06-15 14:18:57 +00:00
fprintf(f, "%s: %s %s\n", "Usage", program_invocation_name,
"[-d] [-r] [-l LISTENIP] [-p PORT] [-t TIMEOUTMS]");
exit(rc);
}
2023-09-10 15:12:43 +00:00
char *DescribeAddress(struct sockaddr_in *addr) {
static _Thread_local char res[64];
char ip4buf[64];
sprintf(res, "%s:%hu",
inet_ntop(addr->sin_family, &addr->sin_addr.s_addr, ip4buf,
sizeof(ip4buf)),
ntohs(addr->sin_port));
return res;
}
2020-06-15 14:18:57 +00:00
void GetOpts(int argc, char *argv[]) {
int opt;
g_servaddr.sin_family = AF_INET;
g_servaddr.sin_port = htons(RUNITD_PORT);
g_servaddr.sin_addr.s_addr = INADDR_ANY;
2023-09-12 15:58:57 +00:00
while ((opt = getopt(argc, argv, "fqhvVsdrl:p:t:w:")) != -1) {
2020-06-15 14:18:57 +00:00
switch (opt) {
case 'f':
use_ftrace = true;
break;
Improve ZIP filesystem and change its prefix The ZIP filesystem has a breaking change. You now need to use /zip/ to open() / opendir() / etc. assets within the ZIP structure of your APE binary, instead of the previous convention of using zip: or zip! URIs. This is needed because Python likes to use absolute paths, and having ZIP paths encoded like URIs simply broke too many things. Many more system calls have been updated to be able to operate on ZIP files and file descriptors. In particular fcntl() and ioctl() since Python would do things like ask if a ZIP file is a terminal and get confused when the old implementation mistakenly said yes, because the fastest way to guarantee native file descriptors is to dup(2). This change also improves the async signal safety of zipos and ensures it doesn't maintain any open file descriptors beyond that which the user has opened. This change makes a lot of progress towards adding magic numbers that are specific to platforms other than Linux. The philosophy here is that, if you use an operating system like FreeBSD, then you should be able to take advantage of FreeBSD exclusive features, even if we don't polyfill them on other platforms. For example, you can now open() a file with the O_VERIFY flag. If your program runs on other platforms, then Cosmo will automatically set O_VERIFY to zero. This lets you safely use it without the need for #ifdef or ifstatements which detract from readability. One of the blindspots of the ASAN memory hardening we use to offer Rust like assurances has always been that memory passed to the kernel via system calls (e.g. writev) can't be checked automatically since the kernel wasn't built with MODE=asan. This change makes more progress ensuring that each system call will verify the soundness of memory before it's passed to the kernel. The code for doing these checks is fast, particularly for buffers, where it can verify 64 bytes a cycle. - Correct O_LOOP definition on NT - Introduce program_executable_name - Add ASAN guards to more system calls - Improve termios compatibility with BSDs - Fix bug in Windows auxiliary value encoding - Add BSD and XNU specific errnos and open flags - Add check to ensure build doesn't talk to internet
2021-08-22 08:04:18 +00:00
case 's':
use_strace = true;
break;
case 'q':
2023-09-10 15:12:43 +00:00
--g_log_level;
Improve ZIP filesystem and change its prefix The ZIP filesystem has a breaking change. You now need to use /zip/ to open() / opendir() / etc. assets within the ZIP structure of your APE binary, instead of the previous convention of using zip: or zip! URIs. This is needed because Python likes to use absolute paths, and having ZIP paths encoded like URIs simply broke too many things. Many more system calls have been updated to be able to operate on ZIP files and file descriptors. In particular fcntl() and ioctl() since Python would do things like ask if a ZIP file is a terminal and get confused when the old implementation mistakenly said yes, because the fastest way to guarantee native file descriptors is to dup(2). This change also improves the async signal safety of zipos and ensures it doesn't maintain any open file descriptors beyond that which the user has opened. This change makes a lot of progress towards adding magic numbers that are specific to platforms other than Linux. The philosophy here is that, if you use an operating system like FreeBSD, then you should be able to take advantage of FreeBSD exclusive features, even if we don't polyfill them on other platforms. For example, you can now open() a file with the O_VERIFY flag. If your program runs on other platforms, then Cosmo will automatically set O_VERIFY to zero. This lets you safely use it without the need for #ifdef or ifstatements which detract from readability. One of the blindspots of the ASAN memory hardening we use to offer Rust like assurances has always been that memory passed to the kernel via system calls (e.g. writev) can't be checked automatically since the kernel wasn't built with MODE=asan. This change makes more progress ensuring that each system call will verify the soundness of memory before it's passed to the kernel. The code for doing these checks is fast, particularly for buffers, where it can verify 64 bytes a cycle. - Correct O_LOOP definition on NT - Introduce program_executable_name - Add ASAN guards to more system calls - Improve termios compatibility with BSDs - Fix bug in Windows auxiliary value encoding - Add BSD and XNU specific errnos and open flags - Add check to ensure build doesn't talk to internet
2021-08-22 08:04:18 +00:00
break;
case 'v':
2023-09-10 15:12:43 +00:00
++g_log_level;
Improve ZIP filesystem and change its prefix The ZIP filesystem has a breaking change. You now need to use /zip/ to open() / opendir() / etc. assets within the ZIP structure of your APE binary, instead of the previous convention of using zip: or zip! URIs. This is needed because Python likes to use absolute paths, and having ZIP paths encoded like URIs simply broke too many things. Many more system calls have been updated to be able to operate on ZIP files and file descriptors. In particular fcntl() and ioctl() since Python would do things like ask if a ZIP file is a terminal and get confused when the old implementation mistakenly said yes, because the fastest way to guarantee native file descriptors is to dup(2). This change also improves the async signal safety of zipos and ensures it doesn't maintain any open file descriptors beyond that which the user has opened. This change makes a lot of progress towards adding magic numbers that are specific to platforms other than Linux. The philosophy here is that, if you use an operating system like FreeBSD, then you should be able to take advantage of FreeBSD exclusive features, even if we don't polyfill them on other platforms. For example, you can now open() a file with the O_VERIFY flag. If your program runs on other platforms, then Cosmo will automatically set O_VERIFY to zero. This lets you safely use it without the need for #ifdef or ifstatements which detract from readability. One of the blindspots of the ASAN memory hardening we use to offer Rust like assurances has always been that memory passed to the kernel via system calls (e.g. writev) can't be checked automatically since the kernel wasn't built with MODE=asan. This change makes more progress ensuring that each system call will verify the soundness of memory before it's passed to the kernel. The code for doing these checks is fast, particularly for buffers, where it can verify 64 bytes a cycle. - Correct O_LOOP definition on NT - Introduce program_executable_name - Add ASAN guards to more system calls - Improve termios compatibility with BSDs - Fix bug in Windows auxiliary value encoding - Add BSD and XNU specific errnos and open flags - Add check to ensure build doesn't talk to internet
2021-08-22 08:04:18 +00:00
break;
2023-09-12 15:58:57 +00:00
case 'V':
++mbedtls_debug_threshold;
break;
2020-06-15 14:18:57 +00:00
case 'd':
g_daemonize = true;
break;
case 'r':
g_sendready = true;
break;
case 't':
break;
case 'p':
Make improvements - We now serialize the file descriptor table when spawning / executing processes on Windows. This means you can now inherit more stuff than just standard i/o. It's needed by bash, which duplicates the console to file descriptor #255. We also now do a better job serializing the environment variables, so you're less likely to encounter E2BIG when using your bash shell. We also no longer coerce environ to uppercase - execve() on Windows now remotely controls its parent process to make them spawn a replacement for itself. Then it'll be able to terminate immediately once the spawn succeeds, without having to linger around for the lifetime as a shell process for proxying the exit code. When process worker thread running in the parent sees the child die, it's given a handle to the new child, to replace it in the process table. - execve() and posix_spawn() on Windows will now provide CreateProcess an explicit handle list. This allows us to remove handle locks which enables better fork/spawn concurrency, with seriously correct thread safety. Other codebases like Go use the same technique. On the other hand fork() still favors the conventional WIN32 inheritence approach which can be a little bit messy, but is *controlled* by guaranteeing perfectly clean slates at both the spawning and execution boundaries - sigset_t is now 64 bits. Having it be 128 bits was a mistake because there's no reason to use that and it's only supported by FreeBSD. By using the system word size, signal mask manipulation on Windows goes very fast. Furthermore @asyncsignalsafe funcs have been rewritten on Windows to take advantage of signal masking, now that it's much more pleasant to use. - All the overlapped i/o code on Windows has been rewritten for pretty good signal and cancelation safety. We're now able to ensure overlap data structures are cleaned up so long as you don't longjmp() out of out of a signal handler that interrupted an i/o operation. Latencies are also improved thanks to the removal of lots of "busy wait" code. Waits should be optimal for everything except poll(), which shall be the last and final demon we slay in the win32 i/o horror show. - getrusage() on Windows is now able to report RUSAGE_CHILDREN as well as RUSAGE_SELF, thanks to aggregation in the process manager thread.
2023-10-08 12:36:18 +00:00
g_servaddr.sin_port = htons(atoi(optarg));
2020-06-15 14:18:57 +00:00
break;
case 'l':
2023-09-10 15:12:43 +00:00
inet_pton(AF_INET, optarg, &g_servaddr.sin_addr);
2020-06-15 14:18:57 +00:00
break;
case 'h':
ShowUsage(stdout, EXIT_SUCCESS);
default:
ShowUsage(stderr, EX_USAGE);
}
}
}
void StartTcpServer(void) {
int yes = true;
uint32_t asize;
2023-09-10 15:12:43 +00:00
g_servfd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
if (g_servfd == -1) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
fprintf(stderr, "%s: socket failed: %s\n", program_invocation_short_name,
strerror(errno));
2023-09-10 15:12:43 +00:00
exit(1);
}
struct timeval timeo = {DEATH_CLOCK_SECONDS / 10};
setsockopt(g_servfd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
setsockopt(g_servfd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
2023-09-10 15:12:43 +00:00
setsockopt(g_servfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
if (bind(g_servfd, (struct sockaddr *)&g_servaddr, sizeof(g_servaddr)) ==
-1) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
fprintf(stderr, "%s: bind failed: %s\n", program_invocation_short_name,
strerror(errno));
2023-09-10 15:12:43 +00:00
exit(1);
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
unassert(!listen(g_servfd, 10));
2020-06-15 14:18:57 +00:00
asize = sizeof(g_servaddr);
2023-09-10 15:12:43 +00:00
unassert(!getsockname(g_servfd, (struct sockaddr *)&g_servaddr, &asize));
INFOF("listening on tcp:%s", DescribeAddress(&g_servaddr));
2020-06-15 14:18:57 +00:00
if (g_sendready) {
printf("ready %hu\n", ntohs(g_servaddr.sin_port));
close(1);
dup2(g_bogusfd, 1);
2020-06-15 14:18:57 +00:00
}
}
2021-08-07 20:22:35 +00:00
void SendExitMessage(int rc) {
2023-09-10 15:12:43 +00:00
EzSanity();
int res;
2020-06-15 14:18:57 +00:00
unsigned char msg[4 + 1 + 1];
2023-09-10 15:12:43 +00:00
DEBUF("SendExitMessage");
Make numerous improvements - Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 05:58:51 +00:00
msg[0 + 0] = (RUNITD_MAGIC & 0xff000000) >> 030;
msg[0 + 1] = (RUNITD_MAGIC & 0x00ff0000) >> 020;
msg[0 + 2] = (RUNITD_MAGIC & 0x0000ff00) >> 010;
msg[0 + 3] = (RUNITD_MAGIC & 0x000000ff) >> 000;
2020-06-15 14:18:57 +00:00
msg[4] = kRunitExit;
Make numerous improvements - Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 05:58:51 +00:00
msg[5] = rc;
2023-09-10 15:12:43 +00:00
DEBUF("mbedtls_ssl_write");
if (sizeof(msg) != (res = mbedtls_ssl_write(&ezssl, msg, sizeof(msg)))) {
EzTlsDie("SendExitMessage mbedtls_ssl_write failed", res);
}
if ((res = EzTlsFlush(&ezbio, 0, 0))) {
EzTlsDie("SendExitMessage EzTlsFlush failed", res);
}
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
void SendOutputFragmentMessage(enum RunitCommand kind, char *buf, size_t size) {
EzSanity();
2020-06-15 14:18:57 +00:00
ssize_t rc;
unsigned char msg[4 + 1 + 4];
Make numerous improvements - Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 05:58:51 +00:00
msg[0 + 0] = (RUNITD_MAGIC & 0xff000000) >> 030;
msg[0 + 1] = (RUNITD_MAGIC & 0x00ff0000) >> 020;
msg[0 + 2] = (RUNITD_MAGIC & 0x0000ff00) >> 010;
msg[0 + 3] = (RUNITD_MAGIC & 0x000000ff) >> 000;
2020-06-15 14:18:57 +00:00
msg[4 + 0] = kind;
Make numerous improvements - Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2021-09-28 05:58:51 +00:00
msg[5 + 0] = (size & 0xff000000) >> 030;
msg[5 + 1] = (size & 0x00ff0000) >> 020;
msg[5 + 2] = (size & 0x0000ff00) >> 010;
msg[5 + 3] = (size & 0x000000ff) >> 000;
2023-09-10 15:12:43 +00:00
DEBUF("mbedtls_ssl_write");
if (sizeof(msg) != (rc = mbedtls_ssl_write(&ezssl, msg, sizeof(msg)))) {
EzTlsDie("SendOutputFragmentMessage mbedtls_ssl_write failed", rc);
}
2020-06-15 14:18:57 +00:00
while (size) {
2023-09-10 15:12:43 +00:00
if ((rc = mbedtls_ssl_write(&ezssl, buf, size)) <= 0) {
EzTlsDie("SendOutputFragmentMessage mbedtls_ssl_write #2 failed", rc);
}
size -= rc;
buf += rc;
}
if ((rc = EzTlsFlush(&ezbio, 0, 0))) {
EzTlsDie("SendOutputFragmentMessage EzTlsFlush failed", rc);
2020-06-15 14:18:57 +00:00
}
}
2023-09-10 15:12:43 +00:00
void Recv(struct Client *client, void *output, size_t outputsize) {
EzSanity();
ssize_t chunk, received, totalgot;
2023-09-10 15:12:43 +00:00
if (!client->once) {
unassert(Z_OK == inflateInit(&client->zs));
client->once = true;
2022-05-15 06:17:22 +00:00
}
totalgot = 0;
2022-05-15 06:17:22 +00:00
for (;;) {
2023-09-10 15:12:43 +00:00
if (client->rbuf.len >= outputsize) {
memcpy(output, client->rbuf.data + client->rbuf.off, outputsize);
client->rbuf.len -= outputsize;
client->rbuf.off += outputsize;
2022-05-15 06:17:22 +00:00
// trim dymanic buffer once it empties
2023-09-10 15:12:43 +00:00
if (!client->rbuf.len) {
client->rbuf.off = 0;
client->rbuf.cap = 4096;
client->rbuf.data = realloc(client->rbuf.data, client->rbuf.cap);
2022-05-15 06:17:22 +00:00
}
return;
}
2023-09-10 15:12:43 +00:00
if (client->zstatus == Z_STREAM_END) {
WARNF("recv zlib unexpected eof");
pthread_exit(0);
2022-05-15 06:17:22 +00:00
}
// get another fixed-size data packet from network
// pass along error conditions to caller
// pass along eof condition to zlib
2023-09-10 15:12:43 +00:00
received = mbedtls_ssl_read(&ezssl, client->buf, sizeof(client->buf));
if (!received) {
2023-09-10 15:12:43 +00:00
EzTlsDie("got unexpected eof", received);
}
if (received < 0) {
2023-09-10 15:12:43 +00:00
EzTlsDie("read failed", received);
}
totalgot += received;
2022-05-15 06:17:22 +00:00
// decompress packet completely
// into a dynamical size buffer
2023-09-10 15:12:43 +00:00
client->zs.avail_in = received;
client->zs.next_in = (unsigned char *)client->buf;
unassert(Z_OK == client->zstatus);
2021-08-26 04:35:58 +00:00
do {
2022-05-15 06:17:22 +00:00
// make sure we have a reasonable capacity for zlib output
2023-09-10 15:12:43 +00:00
if (client->rbuf.cap - (client->rbuf.off + client->rbuf.len) <
sizeof(client->buf)) {
client->rbuf.cap += sizeof(client->buf);
client->rbuf.data = realloc(client->rbuf.data, client->rbuf.cap);
2022-05-15 06:17:22 +00:00
}
// inflate packet, which naturally can be much larger
// permit zlib no delay flushes that come from sender
2023-09-10 15:12:43 +00:00
client->zs.next_out = (unsigned char *)client->rbuf.data +
(client->rbuf.off + client->rbuf.len);
client->zs.avail_out = chunk =
client->rbuf.cap - (client->rbuf.off + client->rbuf.len);
client->zstatus = inflate(&client->zs, Z_SYNC_FLUSH);
unassert(Z_STREAM_ERROR != client->zstatus);
switch (client->zstatus) {
2022-05-15 06:17:22 +00:00
case Z_NEED_DICT:
WARNF("tls recv Z_NEED_DICT %ld total %ld", received, totalgot);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
2022-05-15 06:17:22 +00:00
case Z_DATA_ERROR:
WARNF("tls recv Z_DATA_ERROR %ld total %ld", received, totalgot);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
2022-05-15 06:17:22 +00:00
case Z_MEM_ERROR:
WARNF("tls recv Z_MEM_ERROR %ld total %ld", received, totalgot);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
2022-05-15 06:17:22 +00:00
case Z_BUF_ERROR:
2023-09-10 15:12:43 +00:00
client->zstatus = Z_OK; // harmless? nothing for inflate to do
break; // it probably just our wraparound eof
2022-05-15 06:17:22 +00:00
default:
2023-09-10 15:12:43 +00:00
client->rbuf.len += chunk - client->zs.avail_out;
2022-05-15 06:17:22 +00:00
break;
}
2023-09-10 15:12:43 +00:00
} while (!client->zs.avail_out);
2021-08-26 04:35:58 +00:00
}
}
void SendProgramOutput(struct Client *client) {
2023-09-10 15:12:43 +00:00
if (client->output) {
SendOutputFragmentMessage(kRunitStderr, client->output,
appendz(client->output).i);
}
}
void PrintProgramOutput(struct Client *client) {
if (client->output) {
char *p = client->output;
size_t z = appendz(p).i;
if ((p = IndentLines(p, z, &z, 2))) {
fwrite(p, 1, z, stderr);
free(p);
}
}
}
void FreeClient(struct Client *client) {
DEBUF("FreeClient");
Close(&client->pipe[1]);
Close(&client->pipe[0]);
2023-09-10 15:12:43 +00:00
if (client->pid) {
kill(client->pid, SIGHUP);
waitpid(client->pid, 0, 0);
}
Close(&client->fd);
2023-09-12 15:58:57 +00:00
if (*client->tmpexepath) {
unlink(client->tmpexepath);
2023-09-10 15:12:43 +00:00
}
if (client->once) {
inflateEnd(&client->zs);
}
EzDestroy();
free(client->rbuf.data);
free(client->output);
free(client);
VERBF("---------------");
}
void *ClientWorker(void *arg) {
2021-08-26 04:35:58 +00:00
uint32_t crc;
2023-08-08 03:22:49 +00:00
sigset_t sigmask;
2023-09-10 15:12:43 +00:00
int events, wstatus;
struct Client *client = arg;
uint32_t namesize, filesize;
2023-09-12 15:58:57 +00:00
char *addrstr, *origname;
2021-08-26 04:35:58 +00:00
unsigned char msg[4 + 1 + 4 + 4 + 4];
2023-09-10 15:12:43 +00:00
SetupPresharedKeySsl(MBEDTLS_SSL_IS_SERVER, g_psk);
defer(FreeClient, client);
// read request to run program
EzFd(client->fd);
DEBUF("EzHandshake");
EzHandshake();
2023-09-10 15:12:43 +00:00
addrstr = DescribeAddress(&client->addr);
DEBUF("%s %s %s", DescribeAddress(&g_servaddr), "accepted", addrstr);
2022-05-15 06:17:22 +00:00
2023-09-10 15:12:43 +00:00
// get the executable
Recv(client, msg, sizeof(msg));
if (READ32BE(msg) != RUNITD_MAGIC) {
WARNF("%s magic mismatch!", addrstr);
pthread_exit(0);
}
if (msg[4] != kRunitExecute) {
WARNF("%s unknown command!", addrstr);
pthread_exit(0);
}
2021-08-26 04:35:58 +00:00
namesize = READ32BE(msg + 5);
filesize = READ32BE(msg + 9);
crc = READ32BE(msg + 13);
2023-09-12 15:58:57 +00:00
origname = gc(calloc(1, namesize + 1));
Recv(client, origname, namesize);
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
VERBF("%s sent %#s (%'u bytes @ %#s)", addrstr, origname, filesize,
2023-09-12 15:58:57 +00:00
client->tmpexepath);
char *exedata = gc(malloc(filesize));
Recv(client, exedata, filesize);
if (crc32_z(0, exedata, filesize) != crc) {
WARNF("%s crc mismatch! %#s", addrstr, origname);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
}
// create the executable file
// if another thread vforks while we're writing it then a race
// condition can happen, where etxtbsy is raised by our execve
// we're using o_cloexec so it's guaranteed to fix itself fast
// thus we use an optimistic approach to avoid expensive locks
sprintf(client->tmpexepath, "o/%s.XXXXXX",
basename(stripext(gc(strdup(origname)))));
2024-03-31 02:03:30 +00:00
int exefd = openatemp(AT_FDCWD, client->tmpexepath, 0, O_CLOEXEC, 0700);
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
if (exefd == -1) {
WARNF("%s failed to open temporary file %#s due to %m", addrstr,
client->tmpexepath);
pthread_exit(0);
}
if (ftruncate(exefd, filesize)) {
WARNF("%s failed to write %#s due to %m", addrstr, origname);
close(exefd);
pthread_exit(0);
}
2023-09-12 15:58:57 +00:00
if (write(exefd, exedata, filesize) != filesize) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
WARNF("%s failed to write %#s due to %m", addrstr, origname);
2023-09-10 15:12:43 +00:00
close(exefd);
pthread_exit(0);
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
if (close(exefd)) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
WARNF("%s failed to close %#s due to %m", addrstr, origname);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
}
// do the args
int i = 0;
char *args[8] = {0};
2023-09-12 15:58:57 +00:00
args[i++] = client->tmpexepath;
if (use_strace)
args[i++] = "--strace";
if (use_ftrace)
args[i++] = "--ftrace";
2020-06-15 14:18:57 +00:00
2023-09-10 15:12:43 +00:00
// run program, tee'ing stderr to both log and client
2023-09-12 15:58:57 +00:00
DEBUF("spawning %s", client->tmpexepath);
2023-08-08 03:22:49 +00:00
sigemptyset(&sigmask);
sigaddset(&sigmask, SIGINT);
sigaddset(&sigmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &sigmask, 0);
2023-09-10 15:12:43 +00:00
// spawn the program
int etxtbsy_tries = 0;
RetryOnEtxtbsyRaceCondition:
Close(&client->pipe[1]);
Close(&client->pipe[0]);
2023-09-10 15:12:43 +00:00
if (etxtbsy_tries++) {
if (etxtbsy_tries == 24) { // ~30 seconds
WARNF("%s failed to spawn on %s due because either (1) the ETXTBSY race "
"condition kept happening or (2) the program in question actually "
"is crashing with SIGVTALRM, without printing anything to out/err!",
2023-09-12 15:58:57 +00:00
origname, g_hostname);
2023-09-10 15:12:43 +00:00
pthread_exit(0);
}
2023-09-10 15:12:43 +00:00
if (usleep(1u << etxtbsy_tries)) {
INFOF("interrupted exponential spawn backoff");
pthread_exit(0);
}
}
errno_t err;
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
struct timespec started;
2023-09-10 15:12:43 +00:00
posix_spawnattr_t spawnattr;
posix_spawn_file_actions_t spawnfila;
sigemptyset(&sigmask);
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
started = timespec_real();
2023-09-10 15:12:43 +00:00
pipe2(client->pipe, O_CLOEXEC);
posix_spawnattr_init(&spawnattr);
posix_spawnattr_setflags(&spawnattr,
POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETPGROUP);
2023-09-10 15:12:43 +00:00
posix_spawnattr_setsigmask(&spawnattr, &sigmask);
posix_spawn_file_actions_init(&spawnfila);
posix_spawn_file_actions_adddup2(&spawnfila, g_bogusfd, 0);
posix_spawn_file_actions_adddup2(&spawnfila, client->pipe[1], 1);
posix_spawn_file_actions_adddup2(&spawnfila, client->pipe[1], 2);
2023-09-12 15:58:57 +00:00
err = posix_spawn(&client->pid, client->tmpexepath, &spawnfila, &spawnattr,
args, environ);
2023-09-10 15:12:43 +00:00
if (err) {
if (err == ETXTBSY) {
goto RetryOnEtxtbsyRaceCondition;
}
2023-09-12 15:58:57 +00:00
WARNF("%s failed to spawn on %s due to %s", client->tmpexepath, g_hostname,
2023-09-10 15:12:43 +00:00
strerror(err));
pthread_exit(0);
}
posix_spawn_file_actions_destroy(&spawnfila);
posix_spawnattr_destroy(&spawnattr);
Close(&client->pipe[1]);
2023-09-12 15:58:57 +00:00
DEBUF("communicating %s[%d]", origname, client->pid);
2023-09-10 15:12:43 +00:00
struct timespec deadline =
2023-08-08 03:22:49 +00:00
timespec_add(timespec_real(), timespec_fromseconds(DEATH_CLOCK_SECONDS));
Add SSL to redbean Your redbean can now interoperate with clients that require TLS crypto. This is accomplished using a protocol polyglot that lets us distinguish between HTTP and HTTPS regardless of the port number. Certificates will be generated automatically, if none are supplied by the user. Footprint increases by only a few hundred kb so redbean in MODY=tiny is now 1.0mb - Add lseek() polyfills for ZIP executable - Automatically polyfill /tmp/FOO paths on NT - Fix readdir() / ftw() / nftw() bugs on Windows - Introduce -B flag for slower SSL that's stronger - Remove mbedtls features Cosmopolitan doesn't need - Have base64 decoder support the uri-safe alternative - Remove Truncated HMAC because it's forbidden by the IETF - Add all the mbedtls test suites and make them go 3x faster - Support opendir() / readdir() / closedir() on ZIP executable - Use Everest for ECDHE-ECDSA because it's so good it's so good - Add tinier implementation of sha1 since it's not worth the rom - Add chi-square monte-carlo mean correlation tests for getrandom() - Source entropy on Windows from the proper interface everyone uses We're continuing to outperform NGINX and other servers on raw message throughput. Using SSL means that instead of 1,000,000 qps you can get around 300,000 qps. However redbean isn't as fast as NGINX yet at SSL handshakes, since redbean can do 2,627 per second and NGINX does 4.3k Right now, the SSL UX story works best if you give your redbean a key signing key since that can be easily generated by openssl using a one liner then redbean will do all the things that are impossibly hard to do like signing ecdsa and rsa certificates that'll work in chrome. We should integrate the let's encrypt acme protocol in the future. Live Demo: https://redbean.justine.lol/ Root Cert: https://redbean.justine.lol/redbean1.crt
2021-06-24 19:31:26 +00:00
for (;;) {
2023-09-10 15:12:43 +00:00
if (g_interrupted) {
WARNF("killing %d %s and hanging up %d due to interrupt", client->fd,
2023-09-12 15:58:57 +00:00
origname, client->pid);
2023-09-10 15:12:43 +00:00
HangupClientAndTerminateJob:
SendProgramOutput(client);
2023-09-10 15:12:43 +00:00
mbedtls_ssl_close_notify(&ezssl);
2023-08-08 03:22:49 +00:00
TerminateJob:
2023-09-10 15:12:43 +00:00
PrintProgramOutput(client);
pthread_exit(0);
}
struct timespec now = timespec_real();
if (timespec_cmp(now, deadline) >= 0) {
2023-09-12 15:58:57 +00:00
WARNF("killing %s (pid %d) which timed out after %d seconds", origname,
2023-09-10 15:12:43 +00:00
client->pid, DEATH_CLOCK_SECONDS);
goto HangupClientAndTerminateJob;
}
struct pollfd fds[2];
2023-09-10 15:12:43 +00:00
fds[0].fd = client->fd;
fds[0].events = POLLIN;
2023-09-10 15:12:43 +00:00
fds[1].fd = client->pipe[0];
fds[1].events = POLLIN;
2023-09-10 15:12:43 +00:00
events = poll(fds, ARRAYLEN(fds),
timespec_tomillis(timespec_sub(deadline, now)));
if (events == -1) {
if (errno == EINTR) {
INFOF("poll interrupted");
continue;
} else {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
WARNF("killing %d %s and hanging up %d because poll failed with %m",
client->fd, origname, client->pid);
2023-09-10 15:12:43 +00:00
goto HangupClientAndTerminateJob;
}
}
2023-08-08 03:22:49 +00:00
if (events) {
if (fds[0].revents) {
int received;
char buf[512];
received = mbedtls_ssl_read(&ezssl, buf, sizeof(buf));
if (!received) {
2023-09-12 15:58:57 +00:00
WARNF("%s client disconnected so killing worker %d", origname,
2023-09-10 15:12:43 +00:00
client->pid);
goto TerminateJob;
2023-08-08 03:22:49 +00:00
}
if (received > 0) {
WARNF("%s client sent %d unexpected bytes", origname, received);
continue;
}
2023-09-10 15:12:43 +00:00
if (received == MBEDTLS_ERR_SSL_WANT_READ) { // EAGAIN SO_RCVTIMEO
2023-09-12 15:58:57 +00:00
WARNF("%s (pid %d) is taking a really long time", origname,
2023-09-10 15:12:43 +00:00
client->pid);
continue;
}
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
if (received == MBEDTLS_ERR_SSL_CANCELED) { // EAGAIN SO_RCVTIMEO
WARNF("%s (pid %d) is is canceling job", origname, client->pid);
goto HangupClientAndTerminateJob;
}
2023-09-10 15:12:43 +00:00
WARNF("client ssl read failed with -0x%04x (%s) so killing %s",
2023-09-12 15:58:57 +00:00
-received, GetTlsError(received), origname);
2023-09-10 15:12:43 +00:00
goto TerminateJob;
2023-08-08 03:22:49 +00:00
}
if (fds[1].revents) {
2023-09-10 15:12:43 +00:00
char buf[512];
ssize_t got = read(client->pipe[0], buf, sizeof(buf));
if (got == -1) {
2023-09-12 15:58:57 +00:00
WARNF("got %s reading %s output", strerror(errno), origname);
2023-09-10 15:12:43 +00:00
goto HangupClientAndTerminateJob;
}
2023-08-08 03:22:49 +00:00
if (!got) {
2023-09-12 15:58:57 +00:00
VERBF("got eof reading %s output", origname);
2023-09-10 15:12:43 +00:00
Close(&client->pipe[0]);
2023-08-08 03:22:49 +00:00
break;
}
2023-09-12 15:58:57 +00:00
DEBUF("got %ld bytes reading %s output", got, origname);
2023-09-10 15:12:43 +00:00
appendd(&client->output, buf, got);
}
}
}
2023-09-10 15:12:43 +00:00
WaitAgain:
DEBUF("waitpid");
struct rusage rusage;
int wrc = wait4(client->pid, &wstatus, 0, &rusage);
if (wrc == -1) {
if (errno == EINTR) {
2023-09-12 15:58:57 +00:00
WARNF("waitpid interrupted; killing %s pid %d", origname, client->pid);
2023-09-10 15:12:43 +00:00
kill(client->pid, SIGINT);
goto WaitAgain;
}
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
if (errno == ECANCELED) {
WARNF("thread is canceled; killing %s pid %d", origname, client->pid);
kill(client->pid, SIGKILL);
goto WaitAgain;
}
2023-09-10 15:12:43 +00:00
WARNF("waitpid failed %m");
client->pid = 0;
goto HangupClientAndTerminateJob;
}
client->pid = 0;
int exitcode;
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
struct timespec ended = timespec_real();
int64_t micros = timespec_tomicros(timespec_sub(ended, started));
if (WIFEXITED(wstatus)) {
2021-08-26 04:35:58 +00:00
if (WEXITSTATUS(wstatus)) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
WARNF("%s on %s exited with $?=%d after %'ldµs", origname, g_hostname,
WEXITSTATUS(wstatus), micros);
appendf(&client->output, "------ %s %s $?=%d (0x%08x) %,ldµs ------\n",
g_hostname, origname, WEXITSTATUS(wstatus), wstatus, micros);
2021-08-26 04:35:58 +00:00
} else {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
INFOF("%s on %s exited with $?=%d after %'ldµs", origname, g_hostname,
WEXITSTATUS(wstatus), micros);
2021-08-26 04:35:58 +00:00
}
exitcode = WEXITSTATUS(wstatus);
2023-09-10 15:12:43 +00:00
} else if (WIFSIGNALED(wstatus)) {
if (WTERMSIG(wstatus) == SIGVTALRM && !client->output) {
free(client->output);
client->output = 0;
goto RetryOnEtxtbsyRaceCondition;
}
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
char sigbuf[21];
WARNF("%s on %s terminated after %'ldµs with %s", origname, g_hostname,
micros, strsignal_r(WTERMSIG(wstatus), sigbuf));
exitcode = 128 + WTERMSIG(wstatus);
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
appendf(&client->output, "------ %s %s $?=%s (0x%08x) %,ldµs ------\n",
g_hostname, origname, strsignal(WTERMSIG(wstatus)), wstatus,
micros);
2023-09-10 15:12:43 +00:00
} else {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
WARNF("%s on %s died after %'ldµs with wait status 0x%08x", origname,
g_hostname, micros, wstatus);
2023-09-10 15:12:43 +00:00
exitcode = 127;
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
if (wstatus) {
AppendResourceReport(&client->output, &rusage, "\n");
PrintProgramOutput(client);
}
SendProgramOutput(client);
2021-08-07 20:22:35 +00:00
SendExitMessage(exitcode);
mbedtls_ssl_close_notify(&ezssl);
2023-09-12 15:58:57 +00:00
if (etxtbsy_tries > 1) {
WARNF("encountered %d ETXTBSY race conditions spawning %s",
etxtbsy_tries - 1, origname);
2023-09-10 15:12:43 +00:00
}
pthread_exit(0);
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
void HandleClient(void) {
struct stat st;
2023-09-10 15:12:43 +00:00
struct Client *client;
client = calloc(1, sizeof(struct Client));
client->addrsize = sizeof(client->addr);
for (;;) {
if (g_interrupted) {
Make improvements - Every unit test now passes on Apple Silicon. The final piece of this puzzle was porting our POSIX threads cancelation support, since that works differently on ARM64 XNU vs. AMD64. Our semaphore support on Apple Silicon is also superior now compared to AMD64, thanks to the grand central dispatch library which lets *NSYNC locks go faster. - The Cosmopolitan runtime is now more stable, particularly on Windows. To do this, thread local storage is mandatory at all runtime levels, and the innermost packages of the C library is no longer being built using ASAN. TLS is being bootstrapped with a 128-byte TIB during the process startup phase, and then later on the runtime re-allocates it either statically or dynamically to support code using _Thread_local. fork() and execve() now do a better job cooperating with threads. We can now check how much stack memory is left in the process or thread when functions like kprintf() / execve() etc. call alloca(), so that ENOMEM can be raised, reduce a buffer size, or just print a warning. - POSIX signal emulation is now implemented the same way kernels do it with pthread_kill() and raise(). Any thread can interrupt any other thread, regardless of what it's doing. If it's blocked on read/write then the killer thread will cancel its i/o operation so that EINTR can be returned in the mark thread immediately. If it's doing a tight CPU bound operation, then that's also interrupted by the signal delivery. Signal delivery works now by suspending a thread and pushing context data structures onto its stack, and redirecting its execution to a trampoline function, which calls SetThreadContext(GetCurrentThread()) when it's done. - We're now doing a better job managing locks and handles. On NetBSD we now close semaphore file descriptors in forked children. Semaphores on Windows can now be canceled immediately, which means mutexes/condition variables will now go faster. Apple Silicon semaphores can be canceled too. We're now using Apple's pthread_yield() funciton. Apple _nocancel syscalls are now used on XNU when appropriate to ensure pthread_cancel requests aren't lost. The MbedTLS library has been updated to support POSIX thread cancelations. See tool/build/runitd.c for an example of how it can be used for production multi-threaded tls servers. Handles on Windows now leak less often across processes. All i/o operations on Windows are now overlapped, which means file pointers can no longer be inherited across dup() and fork() for the time being. - We now spawn a thread on Windows to deliver SIGCHLD and wakeup wait4() which means, for example, that posix_spawn() now goes 3x faster. POSIX spawn is also now more correct. Like Musl, it's now able to report the failure code of execve() via a pipe although our approach favors using shared memory to do that on systems that have a true vfork() function. - We now spawn a thread to deliver SIGALRM to threads when setitimer() is used. This enables the most precise wakeups the OS makes possible. - The Cosmopolitan runtime now uses less memory. On NetBSD for example, it turned out the kernel would actually commit the PT_GNU_STACK size which caused RSS to be 6mb for every process. Now it's down to ~4kb. On Apple Silicon, we reduce the mandatory upstream thread size to the smallest possible size to reduce the memory overhead of Cosmo threads. The examples directory has a program called greenbean which can spawn a web server on Linux with 10,000 worker threads and have the memory usage of the process be ~77mb. The 1024 byte overhead of POSIX-style thread-local storage is now optional; it won't be allocated until the pthread_setspecific/getspecific functions are called. On Windows, the threads that get spawned which are internal to the libc implementation use reserve rather than commit memory, which shaves a few hundred kb. - sigaltstack() is now supported on Windows, however it's currently not able to be used to handle stack overflows, since crash signals are still generated by WIN32. However the crash handler will still switch to the alt stack, which is helpful in environments with tiny threads. - Test binaries are now smaller. Many of the mandatory dependencies of the test runner have been removed. This ensures many programs can do a better job only linking the the thing they're testing. This caused the test binaries for LIBC_FMT for example, to decrease from 200kb to 50kb - long double is no longer used in the implementation details of libc, except in the APIs that define it. The old code that used long double for time (instead of struct timespec) has now been thoroughly removed. - ShowCrashReports() is now much tinier in MODE=tiny. Instead of doing backtraces itself, it'll just print a command you can run on the shell using our new `cosmoaddr2line` program to view the backtrace. - Crash report signal handling now works in a much better way. Instead of terminating the process, it now relies on SA_RESETHAND so that the default SIG_IGN behavior can terminate the process if necessary. - Our pledge() functionality has now been fully ported to AARCH64 Linux.
2023-09-19 03:44:45 +00:00
pthread_cancel(0);
2023-09-10 15:12:43 +00:00
free(client);
return;
}
// poll() because we use SA_RESTART and accept() is @restartable
if (poll(&(struct pollfd){g_servfd, POLLIN}, 1, -1) > 0) {
client->fd = accept4(g_servfd, (struct sockaddr *)&client->addr,
&client->addrsize, SOCK_CLOEXEC);
if (client->fd != -1) {
VERBF("accepted client fd %d", client->fd);
break;
} else if (errno != EINTR && errno != EAGAIN) {
WARNF("accept4 failed %m");
}
} else if (errno != EINTR && errno != EAGAIN) {
WARNF("poll failed %m");
}
2020-06-15 14:18:57 +00:00
}
if (fstat(2, &st) != -1 && st.st_size > kLogMaxBytes) {
ftruncate(2, 0); // auto rotate log
}
2023-09-10 15:12:43 +00:00
sigset_t mask;
pthread_attr_t attr;
sigfillset(&mask);
pthread_attr_init(&attr);
pthread_attr_setsigmask_np(&attr, &mask);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
pthread_create(&client->th, &attr, ClientWorker, client);
pthread_attr_destroy(&attr);
2020-06-15 14:18:57 +00:00
}
int Serve(void) {
2023-09-10 15:12:43 +00:00
sigset_t mask;
2020-06-15 14:18:57 +00:00
StartTcpServer();
2023-09-10 15:12:43 +00:00
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
signal(SIGINT, OnInterrupt);
sigprocmask(SIG_BLOCK, &mask, 0);
while (!g_interrupted) {
HandleClient();
2020-06-15 14:18:57 +00:00
}
2023-09-10 15:12:43 +00:00
if (g_interrupted) {
WARNF("got ctrl-c, shutting down...");
}
2023-09-10 15:12:43 +00:00
WARNF("server exiting");
close(g_servfd);
return 0;
2020-06-15 14:18:57 +00:00
}
void Daemonize(void) {
if (fork() > 0)
_exit(0);
2020-06-15 14:18:57 +00:00
setsid();
if (fork() > 0)
_exit(0);
dup2(g_bogusfd, 0);
if (!g_sendready)
dup2(g_bogusfd, 1);
close(2);
open(kLogFile, O_CREAT | O_WRONLY | O_APPEND | O_CLOEXEC, 0644);
extern long __klog_handle;
if (__klog_handle > 0) {
close(__klog_handle);
}
__klog_handle = 2;
2020-06-15 14:18:57 +00:00
}
int main(int argc, char *argv[]) {
GetOpts(argc, argv);
2023-09-10 15:12:43 +00:00
g_psk = GetRunitPsk();
signal(SIGPIPE, SIG_IGN);
setenv("TZ", "PST", true);
gethostname(g_hostname, sizeof(g_hostname));
for (int i = 3; i < 16; ++i)
close(i);
2022-04-29 13:06:23 +00:00
errno = 0;
// poll()'ing /dev/null stdin file descriptor on xnu returns POLLNVAL?!
if (IsWindows()) {
2023-09-10 15:12:43 +00:00
g_bogusfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
} else {
2023-09-10 15:12:43 +00:00
g_bogusfd = open("/dev/zero", O_RDONLY | O_CLOEXEC);
}
2023-09-10 15:12:43 +00:00
mkdir("o", 0700);
if (g_daemonize)
Daemonize();
2023-09-10 15:12:43 +00:00
Serve();
free(g_psk);
#ifdef MODE_DBG
2023-09-10 15:12:43 +00:00
CheckForMemoryLeaks();
CheckForFileLeaks();
2023-09-10 15:12:43 +00:00
#endif
pthread_exit(0);
2020-06-15 14:18:57 +00:00
}