cosmopolitan/tool/scripts/flakes
Justine Tunney e939659b70
Fix ordering of pthread_create(pthread_t *thread)
This change fixes a bug where signal_latency_async_test would flake less
than 1/1000 of the time. What was happening was pthread_kill(sender_thr)
would return EFAULT. This was because pthread_create() was not returning
the thread object pointer until after clone() had been called. So it was
actually possible for the main thread to stall after calling clone() and
during that time the receiver would launch and receive a signal from the
sender thread, and then fail when it tried to send a pong. I thought I'd
use a barrier at first, in the test, to synchronize thread creation, but
I firmly believe that pthread_create() was to blame and now that's fixed
2025-01-03 17:34:29 -08:00

63 lines
2.5 KiB
Python
Executable file

#!/usr/bin/env python3
import os
import sys
import subprocess
import concurrent.futures
from collections import Counter
from typing import List, Dict, Tuple
NUM_PARALLEL = int(os.cpu_count() * 20)
def find_test_files(root: str) -> List[str]:
"""Find all executable files ending with _test recursively."""
test_files = []
if os.path.isdir(root):
for root, _, files in os.walk(root):
for file in files:
if file.endswith('_test'):
file_path = os.path.join(root, file)
if os.access(file_path, os.X_OK):
test_files.append(file_path)
elif root.endswith('_test'):
test_files.append(root)
return test_files
def run_single_test(test_path: str) -> int:
"""Run a single test and return its exit code."""
try:
result = subprocess.run(["ape", test_path], capture_output=False)
return result.returncode
except Exception as e:
print(f"Error running {test_path}: {e}")
return -1
def run_test_multiple_times(test_path: str, iterations: int = NUM_PARALLEL) -> List[int]:
"""Run a test multiple times in parallel and collect exit codes."""
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [executor.submit(run_single_test, test_path) for _ in range(iterations)]
return [f.result() for f in concurrent.futures.as_completed(futures)]
def analyze_results(test_path: str, exit_codes: List[int]) -> Tuple[bool, Dict[int, int]]:
"""Analyze test results and return if it flaked and error distribution."""
error_counts = Counter(code for code in exit_codes if code != 0)
return bool(error_counts), dict(error_counts)
def print_flaky_report(test_path: str, error_distribution: Dict[int, int], total_runs: int):
"""Print a report for a flaky test."""
print(f"{test_path} flaked!")
for exit_code, count in error_distribution.items():
print(f"* {count}/{total_runs} processes died with exit code {exit_code}")
def main(directory = "o"):
test_files = find_test_files(directory)
for i, test_path in enumerate(test_files):
print("testing [%d/%d] %s..." % (i, len(test_files), test_path))
sys.stdout.flush()
exit_codes = run_test_multiple_times(test_path)
is_flaky, error_distribution = analyze_results(test_path, exit_codes)
if is_flaky:
print_flaky_report(test_path, error_distribution, len(exit_codes))
sys.exit(1)
if __name__ == "__main__":
main(*sys.argv[1:])