Build tool for hunting down flakes

This commit is contained in:
Justine Tunney 2024-12-24 11:34:42 -08:00
parent 93e22c581f
commit 2de3845b25
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
10 changed files with 78 additions and 131 deletions

View file

@ -35,9 +35,8 @@ void shm_path_np(const char *name, char buf[hasatleast 78]) {
const char *a;
uint8_t digest[BLAKE2B256_DIGEST_LENGTH];
a = "/tmp/", n = 5;
if (IsLinux() && isdirectory("/dev/shm")) {
if (IsLinux() && isdirectory("/dev/shm"))
a = "/dev/shm/", n = 9;
}
BLAKE2B256(name, strlen(name), digest);
p = mempcpy(buf, a, n);
p = hexpcpy(p, digest, BLAKE2B256_DIGEST_LENGTH);

View file

@ -242,7 +242,6 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex,
*
* - `PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP`
* - `PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP`
* - `PTHREAD_NORMAL_MUTEX_INITIALIZER_NP`
*
* Locking a mutex that's already locked by the calling thread will make

View file

@ -52,9 +52,6 @@ COSMOPOLITAN_C_START_
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_RECURSIVE}
#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {0, PTHREAD_MUTEX_ERRORCHECK}
#define PTHREAD_SIGNAL_SAFE_MUTEX_INITIALIZER_NP \
{0, PTHREAD_MUTEX_RECURSIVE | PTHREAD_PROCESS_SHARED}
#ifndef __cplusplus
#define _PTHREAD_ATOMIC(x) _Atomic(x)
#else

View file

@ -29,6 +29,7 @@
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
@ -104,12 +105,14 @@ done:
}
TEST(cachestat, testCachestatShmem) {
char name[64];
sprintf(name, "/cachestat_test-%ld", _rand64());
size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages.
size_t compute_len = 512 * pagesize;
unsigned long num_pages = compute_len / pagesize;
char *data = gc(xmalloc(filesize));
ASSERT_SYS(0, filesize, getrandom(data, filesize, 0));
ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 3, shm_open(name, O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 0, ftruncate(3, filesize));
ASSERT_SYS(0, filesize, write(3, data, filesize));
struct cachestat_range range = {pagesize, compute_len};
@ -117,6 +120,6 @@ TEST(cachestat, testCachestatShmem) {
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted,
"total number of cached and evicted pages is off.\n");
ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat"));
ASSERT_SYS(0, 0, shm_unlink(name));
ASSERT_SYS(0, 0, close(3));
}

View file

@ -56,9 +56,8 @@ int threadid;
void WorkerQuit(int sig, siginfo_t *si, void *ctx) {
ASSERT_EQ(SIGILL, sig);
if (!IsXnu() && !IsOpenbsd()) {
if (!IsXnu() && !IsOpenbsd())
ASSERT_EQ(SI_TKILL, si->si_code);
}
ASSERT_EQ(threadid, gettid());
}

View file

@ -9,6 +9,7 @@
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
@ -18,7 +19,6 @@
#include "libc/sysv/consts/sig.h"
#include "libc/thread/semaphore.h"
#define SHM_PATH "/fc7261622dd420d8"
#define STRING_SEND "hello"
#define STRING_RECV "HELLO"
@ -29,13 +29,14 @@ struct shmbuf {
char buf[256]; /* Data being transferred */
};
char shm_path[64];
atomic_bool *ready;
wontreturn void Bouncer(void) {
/* Create shared memory object and set its size to the size
of our structure. */
int fd = shm_open(SHM_PATH, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
int fd = shm_open(shm_path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("shm_open(bouncer)");
exit(1);
@ -96,7 +97,7 @@ wontreturn void Sender(void) {
/* Open the existing shared memory object and map it
into the caller's address space. */
int fd = shm_open(SHM_PATH, O_RDWR, 0);
int fd = shm_open(shm_path, O_RDWR, 0);
if (fd == -1) {
perror("shm_open(sender)");
exit(1);
@ -136,7 +137,7 @@ wontreturn void Sender(void) {
/* Unlink the shared memory object. Even if the peer process
is still using the object, this is okay. The object will
be removed only after all open references are closed. */
if (shm_unlink(SHM_PATH)) {
if (shm_unlink(shm_path)) {
if (IsWindows() && errno == EACCES) {
// TODO(jart): Make unlink() work better on Windows.
} else {
@ -154,7 +155,7 @@ int pid2;
void OnExit(void) {
kill(pid1, SIGKILL);
kill(pid2, SIGKILL);
shm_unlink(SHM_PATH);
shm_unlink(shm_path);
}
void OnTimeout(int sig) {
@ -164,6 +165,9 @@ void OnTimeout(int sig) {
int main(int argc, char *argv[]) {
// create random shared memory name
sprintf(shm_path, "/shm_open_test-%ld", _rand64());
// create synchronization object
ready = _mapshared(1);

View file

@ -1,114 +0,0 @@
// Copyright 2024 Justine Alexandra Roberts Tunney
//
// Permission to use, copy, modify, and/or distribute this software for
// any purpose with or without fee is hereby granted, provided that the
// above copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#include <cosmo.h>
#include <pthread.h>
#include <signal.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
// tests that recursive mutexes are implemented atomically
//
// glibc fails this test
// musl passes this test
// cosmo only guarantees this in process-shared non-debug mode
atomic_bool done;
atomic_bool ready;
pthread_mutex_t lock;
void hand(int sig) {
if (pthread_mutex_lock(&lock))
_Exit(50);
if (pthread_mutex_unlock(&lock))
_Exit(51);
}
void* work(void* arg) {
ready = true;
while (!done) {
if (pthread_mutex_lock(&lock))
_Exit(60);
if (pthread_mutex_unlock(&lock))
_Exit(61);
}
return 0;
}
int main() {
if (IsQemuUser()) {
// qemu is believed to be the one at fault
kprintf("mutex_async_signal_safety_test flakes on qemu\n");
return 0;
}
if (IsModeDbg()) {
// the deadlock detector gets in the way of our glorious spin lock
kprintf("mutex_async_signal_safety_test not feasible in debug mode\n");
return 0;
}
struct sigaction sa;
sa.sa_handler = hand;
sa.sa_flags = SA_NODEFER;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGUSR1, &sa, 0))
_Exit(1);
pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr))
_Exit(2);
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE))
_Exit(3);
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
_Exit(3);
if (pthread_mutex_init(&lock, &attr))
_Exit(4);
if (pthread_mutexattr_destroy(&attr))
_Exit(5);
pthread_t th;
pthread_attr_t tattr;
if (pthread_attr_init(&tattr))
_Exit(6);
if (pthread_attr_setstacksize(&tattr, 8 * 1024 * 1024))
_Exit(7);
if (pthread_attr_setguardsize(&tattr, 64 * 1024))
_Exit(8);
if (pthread_create(&th, &tattr, work, 0))
_Exit(9);
if (pthread_attr_destroy(&tattr))
_Exit(10);
for (;;)
if (ready)
break;
for (int i = 0; i < 100; ++i) {
if (pthread_kill(th, SIGUSR1))
_Exit(11);
if (pthread_kill(th, SIGUSR1))
_Exit(12);
usleep(1);
}
done = true;
if (pthread_join(th, 0))
_Exit(13);
if (pthread_mutex_destroy(&lock))
_Exit(14);
}

View file

@ -138,8 +138,8 @@ o/$(MODE)/tool/build/dso/dlopen_helper.so: \
o/$(MODE)/tool/build/dso/dlopen_helper.o \
$(OUTPUT_OPTION)
o/$(MODE)/tool/build/dlopen_test.runs: \
o/$(MODE)/tool/build/dlopen_test \
o/$(MODE)/tool/build/dlopen_tester.runs: \
o/$(MODE)/tool/build/dlopen_tester \
o/$(MODE)/tool/build/dso/dlopen_helper.so
$< o/$(MODE)/tool/build/dso/dlopen_helper.so

60
tool/scripts/flakes Executable file
View file

@ -0,0 +1,60 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
import concurrent.futures
from collections import Counter
from typing import List, Dict, Tuple
NUM_PARALLEL = int(os.cpu_count() * 1.5)
def find_test_files(root_dir: str) -> List[str]:
"""Find all executable files ending with _test recursively."""
test_files = []
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith('_test'):
file_path = os.path.join(root, file)
if os.access(file_path, os.X_OK):
test_files.append(file_path)
return test_files
def run_single_test(test_path: str) -> int:
"""Run a single test and return its exit code."""
try:
result = subprocess.run([test_path], capture_output=False)
return result.returncode
except Exception as e:
print(f"Error running {test_path}: {e}")
return -1
def run_test_multiple_times(test_path: str, iterations: int = NUM_PARALLEL) -> List[int]:
"""Run a test multiple times in parallel and collect exit codes."""
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [executor.submit(run_single_test, test_path) for _ in range(iterations)]
return [f.result() for f in concurrent.futures.as_completed(futures)]
def analyze_results(test_path: str, exit_codes: List[int]) -> Tuple[bool, Dict[int, int]]:
"""Analyze test results and return if it flaked and error distribution."""
error_counts = Counter(code for code in exit_codes if code != 0)
return bool(error_counts), dict(error_counts)
def print_flaky_report(test_path: str, error_distribution: Dict[int, int], total_runs: int):
"""Print a report for a flaky test."""
print(f"{test_path} flaked!")
for exit_code, count in error_distribution.items():
print(f"* {count}/{total_runs} processes died with exit code {exit_code}")
def main(directory = "o"):
test_files = find_test_files(directory)
for i, test_path in enumerate(test_files):
print("testing [%d/%d] %s..." % (i, len(test_files), test_path))
sys.stdout.flush()
exit_codes = run_test_multiple_times(test_path)
is_flaky, error_distribution = analyze_results(test_path, exit_codes)
if is_flaky:
print_flaky_report(test_path, error_distribution, len(exit_codes))
sys.exit(1)
if __name__ == "__main__":
main(*sys.argv[1:])