Build tool for hunting down flakes

This commit is contained in:
Justine Tunney 2024-12-24 11:34:42 -08:00
parent 93e22c581f
commit 2de3845b25
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
10 changed files with 78 additions and 131 deletions

View file

@ -35,9 +35,8 @@ void shm_path_np(const char *name, char buf[hasatleast 78]) {
const char *a; const char *a;
uint8_t digest[BLAKE2B256_DIGEST_LENGTH]; uint8_t digest[BLAKE2B256_DIGEST_LENGTH];
a = "/tmp/", n = 5; a = "/tmp/", n = 5;
if (IsLinux() && isdirectory("/dev/shm")) { if (IsLinux() && isdirectory("/dev/shm"))
a = "/dev/shm/", n = 9; a = "/dev/shm/", n = 9;
}
BLAKE2B256(name, strlen(name), digest); BLAKE2B256(name, strlen(name), digest);
p = mempcpy(buf, a, n); p = mempcpy(buf, a, n);
p = hexpcpy(p, digest, BLAKE2B256_DIGEST_LENGTH); p = hexpcpy(p, digest, BLAKE2B256_DIGEST_LENGTH);

View file

@ -242,7 +242,6 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex,
* *
* - `PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP` * - `PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP` * - `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP`
* - `PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_NORMAL_MUTEX_INITIALIZER_NP` * - `PTHREAD_NORMAL_MUTEX_INITIALIZER_NP`
* *
* Locking a mutex that's already locked by the calling thread will make * Locking a mutex that's already locked by the calling thread will make

View file

@ -52,9 +52,6 @@ COSMOPOLITAN_C_START_
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_RECURSIVE} #define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_RECURSIVE}
#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_ERRORCHECK} #define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_ERRORCHECK}
#define PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP \
{0, PTHREAD_MUTEX_RECURSIVE | PTHREAD_PROCESS_SHARED}
#ifndef __cplusplus #ifndef __cplusplus
#define _PTHREAD_ATOMIC(x) _Atomic(x) #define _PTHREAD_ATOMIC(x) _Atomic(x)
#else #else

View file

@ -29,6 +29,7 @@
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h" #include "libc/runtime/sysconf.h"
#include "libc/stdio/rand.h" #include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/auxv.h" #include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/o.h" #include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
@ -104,12 +105,14 @@ done:
} }
TEST(cachestat, testCachestatShmem) { TEST(cachestat, testCachestatShmem) {
char name[64];
sprintf(name, "/cachestat_test-%ld", _rand64());
size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages. size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages.
size_t compute_len = 512 * pagesize; size_t compute_len = 512 * pagesize;
unsigned long num_pages = compute_len / pagesize; unsigned long num_pages = compute_len / pagesize;
char *data = gc(xmalloc(filesize)); char *data = gc(xmalloc(filesize));
ASSERT_SYS(0, filesize, getrandom(data, filesize, 0)); ASSERT_SYS(0, filesize, getrandom(data, filesize, 0));
ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600)); ASSERT_SYS(0, 3, shm_open(name, O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 0, ftruncate(3, filesize)); ASSERT_SYS(0, 0, ftruncate(3, filesize));
ASSERT_SYS(0, filesize, write(3, data, filesize)); ASSERT_SYS(0, filesize, write(3, data, filesize));
struct cachestat_range range = {pagesize, compute_len}; struct cachestat_range range = {pagesize, compute_len};
@ -117,6 +120,6 @@ TEST(cachestat, testCachestatShmem) {
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted, ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted,
"total number of cached and evicted pages is off.\n"); "total number of cached and evicted pages is off.\n");
ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat")); ASSERT_SYS(0, 0, shm_unlink(name));
ASSERT_SYS(0, 0, close(3)); ASSERT_SYS(0, 0, close(3));
} }

View file

@ -56,9 +56,8 @@ int threadid;
void WorkerQuit(int sig, siginfo_t *si, void *ctx) { void WorkerQuit(int sig, siginfo_t *si, void *ctx) {
ASSERT_EQ(SIGILL, sig); ASSERT_EQ(SIGILL, sig);
if (!IsXnu() && !IsOpenbsd()) { if (!IsXnu() && !IsOpenbsd())
ASSERT_EQ(SI_TKILL, si->si_code); ASSERT_EQ(SI_TKILL, si->si_code);
}
ASSERT_EQ(threadid, gettid()); ASSERT_EQ(threadid, gettid());
} }

View file

@ -9,6 +9,7 @@
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h" #include "libc/stdio/stdio.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
@ -18,7 +19,6 @@
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/thread/semaphore.h" #include "libc/thread/semaphore.h"
#define SHM_PATH "/fc7261622dd420d8"
#define STRING_SEND "hello" #define STRING_SEND "hello"
#define STRING_RECV "HELLO" #define STRING_RECV "HELLO"
@ -29,13 +29,14 @@ struct shmbuf {
char buf[256]; /* Data being transferred */ char buf[256]; /* Data being transferred */
}; };
char shm_path[64];
atomic_bool *ready; atomic_bool *ready;
wontreturn void Bouncer(void) { wontreturn void Bouncer(void) {
/* Create shared memory object and set its size to the size /* Create shared memory object and set its size to the size
of our structure. */ of our structure. */
int fd = shm_open(SHM_PATH, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); int fd = shm_open(shm_path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) { if (fd == -1) {
perror("shm_open(bouncer)"); perror("shm_open(bouncer)");
exit(1); exit(1);
@ -96,7 +97,7 @@ wontreturn void Sender(void) {
/* Open the existing shared memory object and map it /* Open the existing shared memory object and map it
into the caller's address space. */ into the caller's address space. */
int fd = shm_open(SHM_PATH, O_RDWR, 0); int fd = shm_open(shm_path, O_RDWR, 0);
if (fd == -1) { if (fd == -1) {
perror("shm_open(sender)"); perror("shm_open(sender)");
exit(1); exit(1);
@ -136,7 +137,7 @@ wontreturn void Sender(void) {
/* Unlink the shared memory object. Even if the peer process /* Unlink the shared memory object. Even if the peer process
is still using the object, this is okay. The object will is still using the object, this is okay. The object will
be removed only after all open references are closed. */ be removed only after all open references are closed. */
if (shm_unlink(SHM_PATH)) { if (shm_unlink(shm_path)) {
if (IsWindows() && errno == EACCES) { if (IsWindows() && errno == EACCES) {
// TODO(jart): Make unlink() work better on Windows. // TODO(jart): Make unlink() work better on Windows.
} else { } else {
@ -154,7 +155,7 @@ int pid2;
void OnExit(void) { void OnExit(void) {
kill(pid1, SIGKILL); kill(pid1, SIGKILL);
kill(pid2, SIGKILL); kill(pid2, SIGKILL);
shm_unlink(SHM_PATH); shm_unlink(shm_path);
} }
void OnTimeout(int sig) { void OnTimeout(int sig) {
@ -164,6 +165,9 @@ void OnTimeout(int sig) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
// create random shared memory name
sprintf(shm_path, "/shm_open_test-%ld", _rand64());
// create synchronization object // create synchronization object
ready = _mapshared(1); ready = _mapshared(1);

View file

@ -1,114 +0,0 @@
// Copyright 2024 Justine Alexandra Roberts Tunney
//
// Permission to use, copy, modify, and/or distribute this software for
// any purpose with or without fee is hereby granted, provided that the
// above copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#include <cosmo.h>
#include <pthread.h>
#include <signal.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
// tests that recursive mutexes are implemented atomically
//
// glibc fails this test
// musl passes this test
// cosmo only guarantees this in process-shared non-debug mode
atomic_bool done;
atomic_bool ready;
pthread_mutex_t lock;
void hand(int sig) {
if (pthread_mutex_lock(&lock))
_Exit(50);
if (pthread_mutex_unlock(&lock))
_Exit(51);
}
void* work(void* arg) {
ready = true;
while (!done) {
if (pthread_mutex_lock(&lock))
_Exit(60);
if (pthread_mutex_unlock(&lock))
_Exit(61);
}
return 0;
}
int main() {
if (IsQemuUser()) {
// qemu is believed to be the one at fault
kprintf("mutex_async_signal_safety_test flakes on qemu\n");
return 0;
}
if (IsModeDbg()) {
// the deadlock detector gets in the way of our glorious spin lock
kprintf("mutex_async_signal_safety_test not feasible in debug mode\n");
return 0;
}
struct sigaction sa;
sa.sa_handler = hand;
sa.sa_flags = SA_NODEFER;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGUSR1, &sa, 0))
_Exit(1);
pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr))
_Exit(2);
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE))
_Exit(3);
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
_Exit(3);
if (pthread_mutex_init(&lock, &attr))
_Exit(4);
if (pthread_mutexattr_destroy(&attr))
_Exit(5);
pthread_t th;
pthread_attr_t tattr;
if (pthread_attr_init(&tattr))
_Exit(6);
if (pthread_attr_setstacksize(&tattr, 8 * 1024 * 1024))
_Exit(7);
if (pthread_attr_setguardsize(&tattr, 64 * 1024))
_Exit(8);
if (pthread_create(&th, &tattr, work, 0))
_Exit(9);
if (pthread_attr_destroy(&tattr))
_Exit(10);
for (;;)
if (ready)
break;
for (int i = 0; i < 100; ++i) {
if (pthread_kill(th, SIGUSR1))
_Exit(11);
if (pthread_kill(th, SIGUSR1))
_Exit(12);
usleep(1);
}
done = true;
if (pthread_join(th, 0))
_Exit(13);
if (pthread_mutex_destroy(&lock))
_Exit(14);
}

View file

@ -138,8 +138,8 @@ o/$(MODE)/tool/build/dso/dlopen_helper.so: \
o/$(MODE)/tool/build/dso/dlopen_helper.o \ o/$(MODE)/tool/build/dso/dlopen_helper.o \
$(OUTPUT_OPTION) $(OUTPUT_OPTION)
o/$(MODE)/tool/build/dlopen_test.runs: \ o/$(MODE)/tool/build/dlopen_tester.runs: \
o/$(MODE)/tool/build/dlopen_test \ o/$(MODE)/tool/build/dlopen_tester \
o/$(MODE)/tool/build/dso/dlopen_helper.so o/$(MODE)/tool/build/dso/dlopen_helper.so
$< o/$(MODE)/tool/build/dso/dlopen_helper.so $< o/$(MODE)/tool/build/dso/dlopen_helper.so

60
tool/scripts/flakes Executable file
View file

@ -0,0 +1,60 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
import concurrent.futures
from collections import Counter
from typing import List, Dict, Tuple
NUM_PARALLEL = int(os.cpu_count() * 1.5)
def find_test_files(root_dir: str) -> List[str]:
"""Find all executable files ending with _test recursively."""
test_files = []
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith('_test'):
file_path = os.path.join(root, file)
if os.access(file_path, os.X_OK):
test_files.append(file_path)
return test_files
def run_single_test(test_path: str) -> int:
"""Run a single test and return its exit code."""
try:
result = subprocess.run([test_path], capture_output=False)
return result.returncode
except Exception as e:
print(f"Error running {test_path}: {e}")
return -1
def run_test_multiple_times(test_path: str, iterations: int = NUM_PARALLEL) -> List[int]:
"""Run a test multiple times in parallel and collect exit codes."""
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [executor.submit(run_single_test, test_path) for _ in range(iterations)]
return [f.result() for f in concurrent.futures.as_completed(futures)]
def analyze_results(test_path: str, exit_codes: List[int]) -> Tuple[bool, Dict[int, int]]:
"""Analyze test results and return if it flaked and error distribution."""
error_counts = Counter(code for code in exit_codes if code != 0)
return bool(error_counts), dict(error_counts)
def print_flaky_report(test_path: str, error_distribution: Dict[int, int], total_runs: int):
"""Print a report for a flaky test."""
print(f"{test_path} flaked!")
for exit_code, count in error_distribution.items():
print(f"* {count}/{total_runs} processes died with exit code {exit_code}")
def main(directory = "o"):
test_files = find_test_files(directory)
for i, test_path in enumerate(test_files):
print("testing [%d/%d] %s..." % (i, len(test_files), test_path))
sys.stdout.flush()
exit_codes = run_test_multiple_times(test_path)
is_flaky, error_distribution = analyze_results(test_path, exit_codes)
if is_flaky:
print_flaky_report(test_path, error_distribution, len(exit_codes))
sys.exit(1)
if __name__ == "__main__":
main(*sys.argv[1:])